Merge branch 'exynos-drm-next' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-next

Inki writes:
      This is initial pull request for Exynos. It includes a big change
       that it makes drm_display_mode for timings parameters to be used
       for exynos4 and exynos5 commonly and cleans up unnecessary codes.
       And also it adds device tree support for fimd to get timing values
       and interrupt source from dts file.

       In addition, one more patch, device tree support feature for Exynos
       FIMC, is being reviewed. This patch was posted a little ago like below,
            http://www.mail-archive.com/linux-samsung-soc@vger.kernel.org/msg17568.html

       So we are going to request git pull one more time after reviewed.

* 'exynos-drm-next' of git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos:
  drm/exynos: prepare FIMD clocks
  Revert "of/exynos_g2d: Add Bindings for exynos G2D driver"
  drm/exynos: drm_connector: Fix error check condition
  drm/exynos: drm_rotator: Fix incorrect usage of IS_ERR_OR_NULL
  drm/exynos: mixer: Fix incorrect usage of IS_ERR_OR_NULL
  drm/exynos: hdmi: Fix incorrect usage of IS_ERR_OR_NULL
  drm/exynos: change the method for getting the interrupt
  drm/exynos: enable OF_VIDEOMODE and FB_MODE_HELPERS for exynos drm fimd
  drm/exynos: Add display-timing node parsing using video helper function
  drm/exynos: hdmi: move mode_fixup to drm common hdmi
  drm/exynos: hdmi: using drm_display_mode timings for exynos4
diff --git a/drivers/gpu/drm/gma500/Kconfig b/drivers/gpu/drm/gma500/Kconfig
index 1188f0f..1f6e2df 100644
--- a/drivers/gpu/drm/gma500/Kconfig
+++ b/drivers/gpu/drm/gma500/Kconfig
@@ -2,10 +2,15 @@
 	tristate "Intel GMA5/600 KMS Framebuffer"
 	depends on DRM && PCI && X86
 	select FB_CFB_COPYAREA
-        select FB_CFB_FILLRECT
-        select FB_CFB_IMAGEBLIT
-        select DRM_KMS_HELPER
-        select DRM_TTM
+	select FB_CFB_FILLRECT
+	select FB_CFB_IMAGEBLIT
+	select DRM_KMS_HELPER
+	select DRM_TTM
+	# GMA500 depends on ACPI_VIDEO when ACPI is enabled, just like i915
+	select ACPI_VIDEO if ACPI
+	select BACKLIGHT_CLASS_DEVICE if ACPI
+	select VIDEO_OUTPUT_CONTROL if ACPI
+	select INPUT if ACPI
 	help
 	  Say yes for an experimental 2D KMS framebuffer driver for the
 	  Intel GMA500 ('Poulsbo') and other Intel IMG based graphics
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 8c17534..7b8386f 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -276,6 +276,7 @@
 		goto failed_connector;
 
 	connector = &psb_intel_connector->base;
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	drm_connector_init(dev, connector,
 		&cdv_intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index e223b50..464153d 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -319,6 +319,7 @@
 		goto err_priv;
 
 	connector = &psb_intel_connector->base;
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	encoder = &psb_intel_encoder->base;
 	drm_connector_init(dev, connector,
 			   &cdv_hdmi_connector_funcs,
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 2590cac..31ac392 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -431,7 +431,7 @@
 	fbdev->psb_fb_helper.fbdev = info;
 
 	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
-	strcpy(info->fix.id, "psbfb");
+	strcpy(info->fix.id, "psbdrmfb");
 
 	info->flags = FBINFO_DEFAULT;
 	if (dev_priv->ops->accel_2d && pitch_lines > 8)	/* 2D engine */
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index 054e26e..1f82183 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -80,7 +80,8 @@
  *	the GTT. This is protected via the gtt mutex which the caller
  *	must hold.
  */
-static int psb_gtt_insert(struct drm_device *dev, struct gtt_range *r)
+static int psb_gtt_insert(struct drm_device *dev, struct gtt_range *r,
+			  int resume)
 {
 	u32 __iomem *gtt_slot;
 	u32 pte;
@@ -97,8 +98,10 @@
 	gtt_slot = psb_gtt_entry(dev, r);
 	pages = r->pages;
 
-	/* Make sure changes are visible to the GPU */
-	set_pages_array_wc(pages, r->npage);
+	if (!resume) {
+		/* Make sure changes are visible to the GPU */
+		set_pages_array_wc(pages, r->npage);
+	}
 
 	/* Write our page entries into the GTT itself */
 	for (i = r->roll; i < r->npage; i++) {
@@ -269,7 +272,7 @@
 		ret = psb_gtt_attach_pages(gt);
 		if (ret < 0)
 			goto out;
-		ret = psb_gtt_insert(dev, gt);
+		ret = psb_gtt_insert(dev, gt, 0);
 		if (ret < 0) {
 			psb_gtt_detach_pages(gt);
 			goto out;
@@ -421,9 +424,11 @@
 	int ret = 0;
 	uint32_t pte;
 
-	mutex_init(&dev_priv->gtt_mutex);
+	if (!resume) {
+		mutex_init(&dev_priv->gtt_mutex);
+		psb_gtt_alloc(dev);
+	}
 
-	psb_gtt_alloc(dev);
 	pg = &dev_priv->gtt;
 
 	/* Enable the GTT */
@@ -505,7 +510,8 @@
 	/*
 	 *	Map the GTT and the stolen memory area
 	 */
-	dev_priv->gtt_map = ioremap_nocache(pg->gtt_phys_start,
+	if (!resume)
+		dev_priv->gtt_map = ioremap_nocache(pg->gtt_phys_start,
 						gtt_pages << PAGE_SHIFT);
 	if (!dev_priv->gtt_map) {
 		dev_err(dev->dev, "Failure to map gtt.\n");
@@ -513,7 +519,9 @@
 		goto out_err;
 	}
 
-	dev_priv->vram_addr = ioremap_wc(dev_priv->stolen_base, stolen_size);
+	if (!resume)
+		dev_priv->vram_addr = ioremap_wc(dev_priv->stolen_base,
+						 stolen_size);
 	if (!dev_priv->vram_addr) {
 		dev_err(dev->dev, "Failure to map stolen base.\n");
 		ret = -ENOMEM;
@@ -549,3 +557,31 @@
 	psb_gtt_takedown(dev);
 	return ret;
 }
+
+int psb_gtt_restore(struct drm_device *dev)
+{
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct resource *r = dev_priv->gtt_mem->child;
+	struct gtt_range *range;
+	unsigned int restored = 0, total = 0, size = 0;
+
+	/* On resume, the gtt_mutex is already initialized */
+	mutex_lock(&dev_priv->gtt_mutex);
+	psb_gtt_init(dev, 1);
+
+	while (r != NULL) {
+		range = container_of(r, struct gtt_range, resource);
+		if (range->pages) {
+			psb_gtt_insert(dev, range, 1);
+			size += range->resource.end - range->resource.start;
+			restored++;
+		}
+		r = r->sibling;
+		total++;
+	}
+	mutex_unlock(&dev_priv->gtt_mutex);
+	DRM_DEBUG_DRIVER("Restored %u of %u gtt ranges (%u KB)", restored,
+			 total, (size / 1024));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/gma500/gtt.h b/drivers/gpu/drm/gma500/gtt.h
index aa17423..6191d10 100644
--- a/drivers/gpu/drm/gma500/gtt.h
+++ b/drivers/gpu/drm/gma500/gtt.h
@@ -60,5 +60,5 @@
 extern void psb_gtt_unpin(struct gtt_range *gt);
 extern void psb_gtt_roll(struct drm_device *dev,
 					struct gtt_range *gt, int roll);
-
+extern int psb_gtt_restore(struct drm_device *dev);
 #endif
diff --git a/drivers/gpu/drm/gma500/intel_bios.c b/drivers/gpu/drm/gma500/intel_bios.c
index 403fffb..d349734 100644
--- a/drivers/gpu/drm/gma500/intel_bios.c
+++ b/drivers/gpu/drm/gma500/intel_bios.c
@@ -218,12 +218,11 @@
 	bl_start = find_section(bdb, BDB_LVDS_BACKLIGHT);
 	vbt_lvds_bl = (struct bdb_lvds_backlight *)(bl_start + 1) + p_type;
 
-	lvds_bl = kzalloc(sizeof(*vbt_lvds_bl), GFP_KERNEL);
+	lvds_bl = kmemdup(vbt_lvds_bl, sizeof(*vbt_lvds_bl), GFP_KERNEL);
 	if (!lvds_bl) {
 		dev_err(dev_priv->dev->dev, "out of memory for backlight data\n");
 		return;
 	}
-	memcpy(lvds_bl, vbt_lvds_bl, sizeof(*vbt_lvds_bl));
 	dev_priv->lvds_bl = lvds_bl;
 }
 
diff --git a/drivers/gpu/drm/gma500/intel_bios.h b/drivers/gpu/drm/gma500/intel_bios.h
index c6267c9..978ae4b 100644
--- a/drivers/gpu/drm/gma500/intel_bios.h
+++ b/drivers/gpu/drm/gma500/intel_bios.h
@@ -19,8 +19,8 @@
  *
  */
 
-#ifndef _I830_BIOS_H_
-#define _I830_BIOS_H_
+#ifndef _INTEL_BIOS_H_
+#define _INTEL_BIOS_H_
 
 #include <drm/drmP.h>
 #include <drm/drm_dp_helper.h>
@@ -618,4 +618,4 @@
 #define		PORT_IDPC	8
 #define		PORT_IDPD	9
 
-#endif /* _I830_BIOS_H_ */
+#endif /* _INTEL_BIOS_H_ */
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 2d4ab48..3abf831 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -92,8 +92,8 @@
 {
 	struct mdfld_dsi_pkg_sender *sender =
 				mdfld_dsi_get_pkg_sender(dsi_config);
-	struct drm_device *dev = sender->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct drm_device *dev;
+	struct drm_psb_private *dev_priv;
 	u32 gen_ctrl_val;
 
 	if (!sender) {
@@ -101,6 +101,9 @@
 		return;
 	}
 
+	dev = sender->dev;
+	dev_priv = dev->dev_private;
+
 	/* Set default display backlight value to 85% (0xd8)*/
 	mdfld_dsi_send_mcs_short(sender, write_display_brightness, 0xd8, 1,
 				true);
diff --git a/drivers/gpu/drm/gma500/power.c b/drivers/gpu/drm/gma500/power.c
index 889b854..b6b135f 100644
--- a/drivers/gpu/drm/gma500/power.c
+++ b/drivers/gpu/drm/gma500/power.c
@@ -110,6 +110,8 @@
 	PSB_WVDC32(dev_priv->pge_ctl | _PSB_PGETBL_ENABLED, PSB_PGETBL_CTL);
 	pci_write_config_word(pdev, PSB_GMCH_CTRL,
 			dev_priv->gmch_ctrl | _PSB_GMCH_ENABLED);
+
+	psb_gtt_restore(dev); /* Rebuild our GTT mappings */
 	dev_priv->ops->restore_regs(dev);
 }
 
@@ -313,3 +315,18 @@
 	else
 		return 1;
 }
+
+int gma_power_thaw(struct device *_dev)
+{
+	return gma_power_resume(_dev);
+}
+
+int gma_power_freeze(struct device *_dev)
+{
+	return gma_power_suspend(_dev);
+}
+
+int gma_power_restore(struct device *_dev)
+{
+	return gma_power_resume(_dev);
+}
diff --git a/drivers/gpu/drm/gma500/power.h b/drivers/gpu/drm/gma500/power.h
index 1969d2e..56d8708 100644
--- a/drivers/gpu/drm/gma500/power.h
+++ b/drivers/gpu/drm/gma500/power.h
@@ -41,6 +41,9 @@
  */
 int gma_power_suspend(struct device *dev);
 int gma_power_resume(struct device *dev);
+int gma_power_thaw(struct device *dev);
+int gma_power_freeze(struct device *dev);
+int gma_power_restore(struct device *_dev);
 
 /*
  * These are the functions the driver should use to wrap all hw access
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 111e3df..bddea58 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -601,6 +601,9 @@
 static const struct dev_pm_ops psb_pm_ops = {
 	.resume = gma_power_resume,
 	.suspend = gma_power_suspend,
+	.thaw = gma_power_thaw,
+	.freeze = gma_power_freeze,
+	.restore = gma_power_restore,
 	.runtime_suspend = psb_runtime_suspend,
 	.runtime_resume = psb_runtime_resume,
 	.runtime_idle = psb_runtime_idle,
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index a7fd6c4..6053b8a 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -876,7 +876,6 @@
 #define PSB_D_MSVDX   (1 << 9)
 #define PSB_D_TOPAZ   (1 << 10)
 
-extern int drm_psb_no_fb;
 extern int drm_idle_check_interval;
 
 /*
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 9edb190..6e8f42b 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -50,119 +50,41 @@
 	int p2_slow, p2_fast;
 };
 
-#define INTEL_P2_NUM		      2
-
 struct psb_intel_limit_t {
 	struct psb_intel_range_t dot, vco, n, m, m1, m2, p, p1;
 	struct psb_intel_p2_t p2;
 };
 
-#define I8XX_DOT_MIN		  25000
-#define I8XX_DOT_MAX		 350000
-#define I8XX_VCO_MIN		 930000
-#define I8XX_VCO_MAX		1400000
-#define I8XX_N_MIN		      3
-#define I8XX_N_MAX		     16
-#define I8XX_M_MIN		     96
-#define I8XX_M_MAX		    140
-#define I8XX_M1_MIN		     18
-#define I8XX_M1_MAX		     26
-#define I8XX_M2_MIN		      6
-#define I8XX_M2_MAX		     16
-#define I8XX_P_MIN		      4
-#define I8XX_P_MAX		    128
-#define I8XX_P1_MIN		      2
-#define I8XX_P1_MAX		     33
-#define I8XX_P1_LVDS_MIN	      1
-#define I8XX_P1_LVDS_MAX	      6
-#define I8XX_P2_SLOW		      4
-#define I8XX_P2_FAST		      2
-#define I8XX_P2_LVDS_SLOW	      14
-#define I8XX_P2_LVDS_FAST	      14	/* No fast option */
-#define I8XX_P2_SLOW_LIMIT	 165000
-
-#define I9XX_DOT_MIN		  20000
-#define I9XX_DOT_MAX		 400000
-#define I9XX_VCO_MIN		1400000
-#define I9XX_VCO_MAX		2800000
-#define I9XX_N_MIN		      1
-#define I9XX_N_MAX		      6
-#define I9XX_M_MIN		     70
-#define I9XX_M_MAX		    120
-#define I9XX_M1_MIN		      8
-#define I9XX_M1_MAX		     18
-#define I9XX_M2_MIN		      3
-#define I9XX_M2_MAX		      7
-#define I9XX_P_SDVO_DAC_MIN	      5
-#define I9XX_P_SDVO_DAC_MAX	     80
-#define I9XX_P_LVDS_MIN		      7
-#define I9XX_P_LVDS_MAX		     98
-#define I9XX_P1_MIN		      1
-#define I9XX_P1_MAX		      8
-#define I9XX_P2_SDVO_DAC_SLOW		     10
-#define I9XX_P2_SDVO_DAC_FAST		      5
-#define I9XX_P2_SDVO_DAC_SLOW_LIMIT	 200000
-#define I9XX_P2_LVDS_SLOW		     14
-#define I9XX_P2_LVDS_FAST		      7
-#define I9XX_P2_LVDS_SLOW_LIMIT		 112000
-
-#define INTEL_LIMIT_I8XX_DVO_DAC    0
-#define INTEL_LIMIT_I8XX_LVDS	    1
-#define INTEL_LIMIT_I9XX_SDVO_DAC   2
-#define INTEL_LIMIT_I9XX_LVDS	    3
+#define INTEL_LIMIT_I9XX_SDVO_DAC   0
+#define INTEL_LIMIT_I9XX_LVDS	    1
 
 static const struct psb_intel_limit_t psb_intel_limits[] = {
-	{			/* INTEL_LIMIT_I8XX_DVO_DAC */
-	 .dot = {.min = I8XX_DOT_MIN, .max = I8XX_DOT_MAX},
-	 .vco = {.min = I8XX_VCO_MIN, .max = I8XX_VCO_MAX},
-	 .n = {.min = I8XX_N_MIN, .max = I8XX_N_MAX},
-	 .m = {.min = I8XX_M_MIN, .max = I8XX_M_MAX},
-	 .m1 = {.min = I8XX_M1_MIN, .max = I8XX_M1_MAX},
-	 .m2 = {.min = I8XX_M2_MIN, .max = I8XX_M2_MAX},
-	 .p = {.min = I8XX_P_MIN, .max = I8XX_P_MAX},
-	 .p1 = {.min = I8XX_P1_MIN, .max = I8XX_P1_MAX},
-	 .p2 = {.dot_limit = I8XX_P2_SLOW_LIMIT,
-		.p2_slow = I8XX_P2_SLOW, .p2_fast = I8XX_P2_FAST},
-	 },
-	{			/* INTEL_LIMIT_I8XX_LVDS */
-	 .dot = {.min = I8XX_DOT_MIN, .max = I8XX_DOT_MAX},
-	 .vco = {.min = I8XX_VCO_MIN, .max = I8XX_VCO_MAX},
-	 .n = {.min = I8XX_N_MIN, .max = I8XX_N_MAX},
-	 .m = {.min = I8XX_M_MIN, .max = I8XX_M_MAX},
-	 .m1 = {.min = I8XX_M1_MIN, .max = I8XX_M1_MAX},
-	 .m2 = {.min = I8XX_M2_MIN, .max = I8XX_M2_MAX},
-	 .p = {.min = I8XX_P_MIN, .max = I8XX_P_MAX},
-	 .p1 = {.min = I8XX_P1_LVDS_MIN, .max = I8XX_P1_LVDS_MAX},
-	 .p2 = {.dot_limit = I8XX_P2_SLOW_LIMIT,
-		.p2_slow = I8XX_P2_LVDS_SLOW, .p2_fast = I8XX_P2_LVDS_FAST},
-	 },
 	{			/* INTEL_LIMIT_I9XX_SDVO_DAC */
-	 .dot = {.min = I9XX_DOT_MIN, .max = I9XX_DOT_MAX},
-	 .vco = {.min = I9XX_VCO_MIN, .max = I9XX_VCO_MAX},
-	 .n = {.min = I9XX_N_MIN, .max = I9XX_N_MAX},
-	 .m = {.min = I9XX_M_MIN, .max = I9XX_M_MAX},
-	 .m1 = {.min = I9XX_M1_MIN, .max = I9XX_M1_MAX},
-	 .m2 = {.min = I9XX_M2_MIN, .max = I9XX_M2_MAX},
-	 .p = {.min = I9XX_P_SDVO_DAC_MIN, .max = I9XX_P_SDVO_DAC_MAX},
-	 .p1 = {.min = I9XX_P1_MIN, .max = I9XX_P1_MAX},
-	 .p2 = {.dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
-		.p2_slow = I9XX_P2_SDVO_DAC_SLOW, .p2_fast =
-		I9XX_P2_SDVO_DAC_FAST},
+	 .dot = {.min = 20000, .max = 400000},
+	 .vco = {.min = 1400000, .max = 2800000},
+	 .n = {.min = 1, .max = 6},
+	 .m = {.min = 70, .max = 120},
+	 .m1 = {.min = 8, .max = 18},
+	 .m2 = {.min = 3, .max = 7},
+	 .p = {.min = 5, .max = 80},
+	 .p1 = {.min = 1, .max = 8},
+	 .p2 = {.dot_limit = 200000,
+		.p2_slow = 10, .p2_fast = 5},
 	 },
 	{			/* INTEL_LIMIT_I9XX_LVDS */
-	 .dot = {.min = I9XX_DOT_MIN, .max = I9XX_DOT_MAX},
-	 .vco = {.min = I9XX_VCO_MIN, .max = I9XX_VCO_MAX},
-	 .n = {.min = I9XX_N_MIN, .max = I9XX_N_MAX},
-	 .m = {.min = I9XX_M_MIN, .max = I9XX_M_MAX},
-	 .m1 = {.min = I9XX_M1_MIN, .max = I9XX_M1_MAX},
-	 .m2 = {.min = I9XX_M2_MIN, .max = I9XX_M2_MAX},
-	 .p = {.min = I9XX_P_LVDS_MIN, .max = I9XX_P_LVDS_MAX},
-	 .p1 = {.min = I9XX_P1_MIN, .max = I9XX_P1_MAX},
+	 .dot = {.min = 20000, .max = 400000},
+	 .vco = {.min = 1400000, .max = 2800000},
+	 .n = {.min = 1, .max = 6},
+	 .m = {.min = 70, .max = 120},
+	 .m1 = {.min = 8, .max = 18},
+	 .m2 = {.min = 3, .max = 7},
+	 .p = {.min = 7, .max = 98},
+	 .p1 = {.min = 1, .max = 8},
 	 /* The single-channel range is 25-112Mhz, and dual-channel
 	  * is 80-224Mhz.  Prefer single channel as much as possible.
 	  */
-	 .p2 = {.dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
-		.p2_slow = I9XX_P2_LVDS_SLOW, .p2_fast = I9XX_P2_LVDS_FAST},
+	 .p2 = {.dot_limit = 112000,
+		.p2_slow = 14, .p2_fast = 7},
 	 },
 };
 
@@ -177,9 +99,7 @@
 	return limit;
 }
 
-/** Derive the pixel clock for the given refclk and divisors for 8xx chips. */
-
-static void i8xx_clock(int refclk, struct psb_intel_clock_t *clock)
+static void psb_intel_clock(int refclk, struct psb_intel_clock_t *clock)
 {
 	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
 	clock->p = clock->p1 * clock->p2;
@@ -187,22 +107,6 @@
 	clock->dot = clock->vco / clock->p;
 }
 
-/** Derive the pixel clock for the given refclk and divisors for 9xx chips. */
-
-static void i9xx_clock(int refclk, struct psb_intel_clock_t *clock)
-{
-	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
-	clock->p = clock->p1 * clock->p2;
-	clock->vco = refclk * clock->m / (clock->n + 2);
-	clock->dot = clock->vco / clock->p;
-}
-
-static void psb_intel_clock(struct drm_device *dev, int refclk,
-			struct psb_intel_clock_t *clock)
-{
-	return i9xx_clock(refclk, clock);
-}
-
 /**
  * Returns whether any output on the specified pipe is of the specified type
  */
@@ -308,7 +212,7 @@
 				     clock.p1++) {
 					int this_err;
 
-					psb_intel_clock(dev, refclk, &clock);
+					psb_intel_clock(refclk, &clock);
 
 					if (!psb_intel_PLL_is_valid
 					    (crtc, &clock))
@@ -1068,7 +972,7 @@
 	return 0;
 }
 
-void psb_intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
+static void psb_intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
 			 u16 *green, u16 *blue, uint32_t type, uint32_t size)
 {
 	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
@@ -1149,9 +1053,9 @@
 		if ((dpll & PLL_REF_INPUT_MASK) ==
 		    PLLB_REF_INPUT_SPREADSPECTRUMIN) {
 			/* XXX: might not be 66MHz */
-			i8xx_clock(66000, &clock);
+			psb_intel_clock(66000, &clock);
 		} else
-			i8xx_clock(48000, &clock);
+			psb_intel_clock(48000, &clock);
 	} else {
 		if (dpll & PLL_P1_DIVIDE_BY_TWO)
 			clock.p1 = 2;
@@ -1166,7 +1070,7 @@
 		else
 			clock.p2 = 2;
 
-		i8xx_clock(48000, &clock);
+		psb_intel_clock(48000, &clock);
 	}
 
 	/* XXX: It would be nice to validate the clocks, but we can't reuse
@@ -1225,7 +1129,7 @@
 	return mode;
 }
 
-void psb_intel_crtc_destroy(struct drm_crtc *crtc)
+static void psb_intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
 	struct gtt_range *gt;
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.h b/drivers/gpu/drm/gma500/psb_intel_display.h
index 535b49a..3724b97 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.h
+++ b/drivers/gpu/drm/gma500/psb_intel_display.h
@@ -21,8 +21,5 @@
 #define _INTEL_DISPLAY_H_
 
 bool psb_intel_pipe_has_type(struct drm_crtc *crtc, int type);
-void psb_intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
-			 u16 *green, u16 *blue, uint32_t type, uint32_t size);
-void psb_intel_crtc_destroy(struct drm_crtc *crtc);
 
 #endif
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 90f2d11..4dcae42 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -32,9 +32,6 @@
 /* maximum connectors per crtcs in the mode set */
 #define INTELFB_CONN_LIMIT 4
 
-#define INTEL_I2C_BUS_DVO 1
-#define INTEL_I2C_BUS_SDVO 2
-
 /* Intel Pipe Clone Bit */
 #define INTEL_HDMIB_CLONE_BIT 1
 #define INTEL_HDMIC_CLONE_BIT 2
@@ -68,11 +65,6 @@
 #define INTEL_OUTPUT_DISPLAYPORT 9
 #define INTEL_OUTPUT_EDP 10
 
-#define INTEL_DVO_CHIP_NONE 0
-#define INTEL_DVO_CHIP_LVDS 1
-#define INTEL_DVO_CHIP_TMDS 2
-#define INTEL_DVO_CHIP_TVOUT 4
-
 #define INTEL_MODE_PIXEL_MULTIPLIER_SHIFT (0x0)
 #define INTEL_MODE_PIXEL_MULTIPLIER_MASK (0xf << INTEL_MODE_PIXEL_MULTIPLIER_SHIFT)
 
diff --git a/drivers/gpu/drm/gma500/psb_intel_reg.h b/drivers/gpu/drm/gma500/psb_intel_reg.h
index d914719..0be30e4 100644
--- a/drivers/gpu/drm/gma500/psb_intel_reg.h
+++ b/drivers/gpu/drm/gma500/psb_intel_reg.h
@@ -493,7 +493,6 @@
 #define PIPEACONF_DISABLE		0
 #define PIPEACONF_DOUBLE_WIDE		(1 << 30)
 #define PIPECONF_ACTIVE			(1 << 30)
-#define I965_PIPECONF_ACTIVE		(1 << 30)
 #define PIPECONF_DSIPLL_LOCK		(1 << 29)
 #define PIPEACONF_SINGLE_WIDE		0
 #define PIPEACONF_PIPE_UNLOCKED		0
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index a4cc777a..19e3660 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -134,6 +134,9 @@
 
 	/* Input timings for adjusted_mode */
 	struct psb_intel_sdvo_dtd input_dtd;
+
+	/* Saved SDVO output states */
+	uint32_t saveSDVO; /* Can be SDVOB or SDVOC depending on sdvo_reg */
 };
 
 struct psb_intel_sdvo_connector {
@@ -1830,6 +1833,34 @@
 #undef CHECK_PROPERTY
 }
 
+static void psb_intel_sdvo_save(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct psb_intel_encoder *psb_intel_encoder =
+					psb_intel_attached_encoder(connector);
+	struct psb_intel_sdvo *sdvo =
+				to_psb_intel_sdvo(&psb_intel_encoder->base);
+
+	sdvo->saveSDVO = REG_READ(sdvo->sdvo_reg);
+}
+
+static void psb_intel_sdvo_restore(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_encoder *encoder =
+				&psb_intel_attached_encoder(connector)->base;
+	struct psb_intel_sdvo *sdvo = to_psb_intel_sdvo(encoder);
+	struct drm_crtc *crtc = encoder->crtc;
+
+	REG_WRITE(sdvo->sdvo_reg, sdvo->saveSDVO);
+
+	/* Force a full mode set on the crtc. We're supposed to have the
+	   mode_config lock already. */
+	if (connector->status == connector_status_connected)
+		drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y,
+					 NULL);
+}
+
 static const struct drm_encoder_helper_funcs psb_intel_sdvo_helper_funcs = {
 	.dpms = psb_intel_sdvo_dpms,
 	.mode_fixup = psb_intel_sdvo_mode_fixup,
@@ -1840,6 +1871,8 @@
 
 static const struct drm_connector_funcs psb_intel_sdvo_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
+	.save = psb_intel_sdvo_save,
+	.restore = psb_intel_sdvo_restore,
 	.detect = psb_intel_sdvo_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = psb_intel_sdvo_set_property,
diff --git a/drivers/gpu/drm/gma500/psb_irq.h b/drivers/gpu/drm/gma500/psb_irq.h
index 603045b..debb7f1 100644
--- a/drivers/gpu/drm/gma500/psb_irq.h
+++ b/drivers/gpu/drm/gma500/psb_irq.h
@@ -21,8 +21,8 @@
  *
  **************************************************************************/
 
-#ifndef _SYSIRQ_H_
-#define _SYSIRQ_H_
+#ifndef _PSB_IRQ_H_
+#define _PSB_IRQ_H_
 
 #include <drm/drmP.h>
 
@@ -44,4 +44,4 @@
 
 int mdfld_enable_te(struct drm_device *dev, int pipe);
 void mdfld_disable_te(struct drm_device *dev, int pipe);
-#endif /* _SYSIRQ_H_ */
+#endif /* _PSB_IRQ_H_ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index be88532..e913d32 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -901,7 +901,7 @@
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
 			i915_next_seqno_get, i915_next_seqno_set,
-			"next_seqno :  0x%llx\n");
+			"0x%llx\n");
 
 static int i915_rstdby_delays(struct seq_file *m, void *unused)
 {
@@ -1006,6 +1006,9 @@
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * GT_FREQUENCY_MULTIPLIER);
+
+		seq_printf(m, "Max overclocked frequency: %dMHz\n",
+			   dev_priv->rps.hw_max * GT_FREQUENCY_MULTIPLIER);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
@@ -1354,7 +1357,7 @@
 	if (ret)
 		return ret;
 
-	seq_printf(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\n");
+	seq_printf(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
 
 	for (gpu_freq = dev_priv->rps.min_delay;
 	     gpu_freq <= dev_priv->rps.max_delay;
@@ -1363,7 +1366,10 @@
 		sandybridge_pcode_read(dev_priv,
 				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
 				       &ia_freq);
-		seq_printf(m, "%d\t\t%d\n", gpu_freq * GT_FREQUENCY_MULTIPLIER, ia_freq * 100);
+		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
+			   gpu_freq * GT_FREQUENCY_MULTIPLIER,
+			   ((ia_freq >> 0) & 0xff) * 100,
+			   ((ia_freq >> 8) & 0xff) * 100);
 	}
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
@@ -1687,7 +1693,7 @@
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
-			"wedged :  %llu\n");
+			"%llu\n");
 
 static int
 i915_ring_stop_get(void *data, u64 *val)
@@ -1841,7 +1847,7 @@
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
 			i915_max_freq_get, i915_max_freq_set,
-			"max freq: %llu\n");
+			"%llu\n");
 
 static int
 i915_min_freq_get(void *data, u64 *val)
@@ -1892,7 +1898,7 @@
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
 			i915_min_freq_get, i915_min_freq_set,
-			"min freq: %llu\n");
+			"%llu\n");
 
 static int
 i915_cache_sharing_get(void *data, u64 *val)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4be58e3..3b315ba 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1322,6 +1322,10 @@
 	/* Always safe in the mode setting case. */
 	/* FIXME: do pre/post-mode set stuff in core KMS code */
 	dev->vblank_disable_allowed = 1;
+	if (INTEL_INFO(dev)->num_pipes == 0) {
+		dev_priv->mm.suspended = 0;
+		return 0;
+	}
 
 	ret = intel_fbdev_init(dev);
 	if (ret)
@@ -1514,6 +1518,28 @@
 		goto free_priv;
 	}
 
+	mmio_bar = IS_GEN2(dev) ? 1 : 0;
+	/* Before gen4, the registers and the GTT are behind different BARs.
+	 * However, from gen4 onwards, the registers and the GTT are shared
+	 * in the same BAR, so we want to restrict this ioremap from
+	 * clobbering the GTT which we want ioremap_wc instead. Fortunately,
+	 * the register BAR remains the same size for all the earlier
+	 * generations up to Ironlake.
+	 */
+	if (info->gen < 5)
+		mmio_size = 512*1024;
+	else
+		mmio_size = 2*1024*1024;
+
+	dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size);
+	if (!dev_priv->regs) {
+		DRM_ERROR("failed to map registers\n");
+		ret = -EIO;
+		goto put_bridge;
+	}
+
+	intel_early_sanitize_regs(dev);
+
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
 		goto put_bridge;
@@ -1538,28 +1564,6 @@
 	if (IS_BROADWATER(dev) || IS_CRESTLINE(dev))
 		dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32));
 
-	mmio_bar = IS_GEN2(dev) ? 1 : 0;
-	/* Before gen4, the registers and the GTT are behind different BARs.
-	 * However, from gen4 onwards, the registers and the GTT are shared
-	 * in the same BAR, so we want to restrict this ioremap from
-	 * clobbering the GTT which we want ioremap_wc instead. Fortunately,
-	 * the register BAR remains the same size for all the earlier
-	 * generations up to Ironlake.
-	 */
-	if (info->gen < 5)
-		mmio_size = 512*1024;
-	else
-		mmio_size = 2*1024*1024;
-
-	dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size);
-	if (!dev_priv->regs) {
-		DRM_ERROR("failed to map registers\n");
-		ret = -EIO;
-		goto put_gmch;
-	}
-
-	intel_early_sanitize_regs(dev);
-
 	aperture_size = dev_priv->gtt.mappable_end;
 
 	dev_priv->gtt.mappable =
@@ -1634,9 +1638,11 @@
 	if (IS_VALLEYVIEW(dev))
 		dev_priv->num_plane = 2;
 
-	ret = drm_vblank_init(dev, INTEL_INFO(dev)->num_pipes);
-	if (ret)
-		goto out_gem_unload;
+	if (INTEL_INFO(dev)->num_pipes) {
+		ret = drm_vblank_init(dev, INTEL_INFO(dev)->num_pipes);
+		if (ret)
+			goto out_gem_unload;
+	}
 
 	/* Start out suspended */
 	dev_priv->mm.suspended = 1;
@@ -1651,9 +1657,11 @@
 
 	i915_setup_sysfs(dev);
 
-	/* Must be done after probing outputs */
-	intel_opregion_init(dev);
-	acpi_video_register();
+	if (INTEL_INFO(dev)->num_pipes) {
+		/* Must be done after probing outputs */
+		intel_opregion_init(dev);
+		acpi_video_register();
+	}
 
 	if (IS_GEN5(dev))
 		intel_gpu_ips_init(dev_priv);
@@ -1678,10 +1686,9 @@
 		dev_priv->mm.gtt_mtrr = -1;
 	}
 	io_mapping_free(dev_priv->gtt.mappable);
+	dev_priv->gtt.gtt_remove(dev);
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
-put_gmch:
-	dev_priv->gtt.gtt_remove(dev);
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
 free_priv:
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3b4b9c0..9ebe895 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -140,6 +140,16 @@
 	.subdevice = PCI_ANY_ID,		\
 	.driver_data = (unsigned long) info }
 
+#define INTEL_QUANTA_VGA_DEVICE(info) {		\
+	.class = PCI_BASE_CLASS_DISPLAY << 16,	\
+	.class_mask = 0xff0000,			\
+	.vendor = 0x8086,			\
+	.device = 0x16a,			\
+	.subvendor = 0x152d,			\
+	.subdevice = 0x8990,			\
+	.driver_data = (unsigned long) info }
+
+
 static const struct intel_device_info intel_i830_info = {
 	.gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2,
 	.has_overlay = 1, .overlay_needs_physical = 1,
@@ -272,12 +282,19 @@
 	.is_mobile = 1,
 };
 
+static const struct intel_device_info intel_ivybridge_q_info = {
+	GEN7_FEATURES,
+	.is_ivybridge = 1,
+	.num_pipes = 0, /* legal, last one wins */
+};
+
 static const struct intel_device_info intel_valleyview_m_info = {
 	GEN7_FEATURES,
 	.is_mobile = 1,
 	.num_pipes = 2,
 	.is_valleyview = 1,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
+	.has_llc = 0, /* legal, last one wins */
 };
 
 static const struct intel_device_info intel_valleyview_d_info = {
@@ -285,6 +302,7 @@
 	.num_pipes = 2,
 	.is_valleyview = 1,
 	.display_mmio_offset = VLV_DISPLAY_BASE,
+	.has_llc = 0, /* legal, last one wins */
 };
 
 static const struct intel_device_info intel_haswell_d_info = {
@@ -342,6 +360,7 @@
 	INTEL_VGA_DEVICE(0x0152, &intel_ivybridge_d_info), /* GT1 desktop */
 	INTEL_VGA_DEVICE(0x0162, &intel_ivybridge_d_info), /* GT2 desktop */
 	INTEL_VGA_DEVICE(0x015a, &intel_ivybridge_d_info), /* GT1 server */
+	INTEL_QUANTA_VGA_DEVICE(&intel_ivybridge_q_info), /* Quanta transcode */
 	INTEL_VGA_DEVICE(0x016a, &intel_ivybridge_d_info), /* GT2 server */
 	INTEL_VGA_DEVICE(0x0402, &intel_haswell_d_info), /* GT1 desktop */
 	INTEL_VGA_DEVICE(0x0412, &intel_haswell_d_info), /* GT2 desktop */
@@ -397,6 +416,15 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pch;
 
+	/* In all current cases, num_pipes is equivalent to the PCH_NOP setting
+	 * (which really amounts to a PCH but no South Display).
+	 */
+	if (INTEL_INFO(dev)->num_pipes == 0) {
+		dev_priv->pch_type = PCH_NOP;
+		dev_priv->num_pch_pll = 0;
+		return;
+	}
+
 	/*
 	 * The reason to probe ISA bridge instead of Dev31:Fun0 is to
 	 * make graphics device passthrough work easy for VMM, that only
@@ -431,11 +459,13 @@
 				dev_priv->num_pch_pll = 0;
 				DRM_DEBUG_KMS("Found LynxPoint PCH\n");
 				WARN_ON(!IS_HASWELL(dev));
+				WARN_ON(IS_ULT(dev));
 			} else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_LPT;
 				dev_priv->num_pch_pll = 0;
 				DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
 				WARN_ON(!IS_HASWELL(dev));
+				WARN_ON(!IS_ULT(dev));
 			}
 			BUG_ON(dev_priv->num_pch_pll > I915_NUM_PLLS);
 		}
@@ -901,7 +931,11 @@
 			ring->init(ring);
 
 		i915_gem_context_init(dev);
-		i915_gem_init_ppgtt(dev);
+		if (dev_priv->mm.aliasing_ppgtt) {
+			ret = dev_priv->mm.aliasing_ppgtt->enable(dev);
+			if (ret)
+				i915_gem_cleanup_aliasing_ppgtt(dev);
+		}
 
 		/*
 		 * It would make sense to re-init all the other hw state, at
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44fca0b..d5dcf7f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -195,9 +195,9 @@
 	struct _drm_i915_sarea *sarea_priv;
 };
 #define I915_FENCE_REG_NONE -1
-#define I915_MAX_NUM_FENCES 16
-/* 16 fences + sign bit for FENCE_REG_NONE */
-#define I915_MAX_NUM_FENCE_BITS 5
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
 
 struct drm_i915_fence_reg {
 	struct list_head lru_list;
@@ -449,6 +449,7 @@
 			       struct sg_table *st,
 			       unsigned int pg_start,
 			       enum i915_cache_level cache_level);
+	int (*enable)(struct drm_device *dev);
 	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
 };
 
@@ -479,6 +480,7 @@
 	PCH_IBX,	/* Ibexpeak PCH */
 	PCH_CPT,	/* Cougarpoint PCH */
 	PCH_LPT,	/* Lynxpoint PCH */
+	PCH_NOP,
 };
 
 enum intel_sbi_destination {
@@ -666,6 +668,7 @@
 	u8 cur_delay;
 	u8 min_delay;
 	u8 max_delay;
+	u8 hw_max;
 
 	struct delayed_work delayed_resume_work;
 
@@ -929,6 +932,16 @@
 
 	struct work_struct hotplug_work;
 	bool enable_hotplug_processing;
+	struct {
+		unsigned long hpd_last_jiffies;
+		int hpd_cnt;
+		enum {
+			HPD_ENABLED = 0,
+			HPD_DISABLED = 1,
+			HPD_MARK_DISABLED = 2
+		} hpd_mark;
+	} hpd_stats[HPD_NUM_PINS];
+	struct timer_list hotplug_reenable_timer;
 
 	int num_pch_pll;
 	int num_plane;
@@ -963,6 +976,7 @@
 	unsigned int int_crt_support:1;
 	unsigned int lvds_use_ssc:1;
 	unsigned int display_clock_mode:1;
+	unsigned int fdi_rx_polarity_inverted:1;
 	int lvds_ssc_freq;
 	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
 	struct {
@@ -1373,6 +1387,7 @@
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
+#define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP)
 #define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE)
 
 #define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
@@ -1640,7 +1655,6 @@
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 void i915_gem_l3_remap(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_init_ppgtt(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 911bd40..6be940e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2683,17 +2683,35 @@
 	return fence - dev_priv->fence_regs;
 }
 
+static void i915_gem_write_fence__ipi(void *data)
+{
+	wbinvd();
+}
+
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 					 struct drm_i915_fence_reg *fence,
 					 bool enable)
 {
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	int reg = fence_number(dev_priv, fence);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int fence_reg = fence_number(dev_priv, fence);
 
-	i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+	/* In order to fully serialize access to the fenced region and
+	 * the update to the fence register we need to take extreme
+	 * measures on SNB+. In theory, the write to the fence register
+	 * flushes all memory transactions before, and coupled with the
+	 * mb() placed around the register write we serialise all memory
+	 * operations with respect to the changes in the tiler. Yet, on
+	 * SNB+ we need to take a step further and emit an explicit wbinvd()
+	 * on each processor in order to manually flush all memory
+	 * transactions before updating the fence register.
+	 */
+	if (HAS_LLC(obj->base.dev))
+		on_each_cpu(i915_gem_write_fence__ipi, NULL, 1);
+	i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
 
 	if (enable) {
-		obj->fence_reg = reg;
+		obj->fence_reg = fence_reg;
 		fence->obj = obj;
 		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
 	} else {
@@ -3992,6 +4010,12 @@
 	if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
 		I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
 
+	if (HAS_PCH_NOP(dev)) {
+		u32 temp = I915_READ(GEN7_MSG_CTL);
+		temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
+		I915_WRITE(GEN7_MSG_CTL, temp);
+	}
+
 	i915_gem_l3_remap(dev);
 
 	i915_gem_init_swizzling(dev);
@@ -4005,7 +4029,13 @@
 	 * contexts before PPGTT.
 	 */
 	i915_gem_context_init(dev);
-	i915_gem_init_ppgtt(dev);
+	if (dev_priv->mm.aliasing_ppgtt) {
+		ret = dev_priv->mm.aliasing_ppgtt->enable(dev);
+		if (ret) {
+			i915_gem_cleanup_aliasing_ppgtt(dev);
+			DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n");
+		}
+	}
 
 	return 0;
 }
@@ -4160,7 +4190,9 @@
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		dev_priv->fence_reg_start = 3;
 
-	if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
+		dev_priv->num_fence_regs = 32;
+	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 		dev_priv->num_fence_regs = 16;
 	else
 		dev_priv->num_fence_regs = 8;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 94d873a..a1e8ecb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -152,6 +152,13 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (INTEL_INFO(dev)->gen >= 7) {
+		ret = i915_gem_object_set_cache_level(ctx->obj,
+						      I915_CACHE_LLC_MLC);
+		if (ret)
+			goto err_out;
+	}
+
 	/* The ring associated with the context object is handled by the normal
 	 * object tracking code. We give an initial ring value simple to pass an
 	 * assertion in the context switch code.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 24a23b3..50df194 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -28,7 +28,7 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-typedef uint32_t gtt_pte_t;
+typedef uint32_t gen6_gtt_pte_t;
 
 /* PPGTT stuff */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
@@ -44,11 +44,11 @@
 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 
-static inline gtt_pte_t gen6_pte_encode(struct drm_device *dev,
-					dma_addr_t addr,
-					enum i915_cache_level level)
+static inline gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev,
+					     dma_addr_t addr,
+					     enum i915_cache_level level)
 {
-	gtt_pte_t pte = GEN6_PTE_VALID;
+	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
 	switch (level) {
@@ -72,17 +72,84 @@
 		BUG();
 	}
 
-
 	return pte;
 }
 
+static int gen6_ppgtt_enable(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	uint32_t pd_offset;
+	struct intel_ring_buffer *ring;
+	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+	gen6_gtt_pte_t __iomem *pd_addr;
+	uint32_t pd_entry;
+	int i;
+
+	pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
+		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
+	for (i = 0; i < ppgtt->num_pd_entries; i++) {
+		dma_addr_t pt_addr;
+
+		pt_addr = ppgtt->pt_dma_addr[i];
+		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
+		pd_entry |= GEN6_PDE_VALID;
+
+		writel(pd_entry, pd_addr + i);
+	}
+	readl(pd_addr);
+
+	pd_offset = ppgtt->pd_offset;
+	pd_offset /= 64; /* in cachelines, */
+	pd_offset <<= 16;
+
+	if (INTEL_INFO(dev)->gen == 6) {
+		uint32_t ecochk, gab_ctl, ecobits;
+
+		ecobits = I915_READ(GAC_ECO_BITS);
+		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
+					 ECOBITS_PPGTT_CACHE64B);
+
+		gab_ctl = I915_READ(GAB_CTL);
+		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+		ecochk = I915_READ(GAM_ECOCHK);
+		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
+				       ECOCHK_PPGTT_CACHE64B);
+		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+	} else if (INTEL_INFO(dev)->gen >= 7) {
+		uint32_t ecochk, ecobits;
+
+		ecobits = I915_READ(GAC_ECO_BITS);
+		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
+
+		ecochk = I915_READ(GAM_ECOCHK);
+		if (IS_HASWELL(dev)) {
+			ecochk |= ECOCHK_PPGTT_WB_HSW;
+		} else {
+			ecochk |= ECOCHK_PPGTT_LLC_IVB;
+			ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+		}
+		I915_WRITE(GAM_ECOCHK, ecochk);
+		/* GFX_MODE is per-ring on gen7+ */
+	}
+
+	for_each_ring(ring, dev_priv, i) {
+		if (INTEL_INFO(dev)->gen >= 7)
+			I915_WRITE(RING_MODE_GEN7(ring),
+				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
+	}
+	return 0;
+}
+
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
-	gtt_pte_t *pt_vaddr;
-	gtt_pte_t scratch_pte;
+	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
@@ -114,7 +181,7 @@
 				      unsigned first_entry,
 				      enum i915_cache_level cache_level)
 {
-	gtt_pte_t *pt_vaddr;
+	gen6_gtt_pte_t *pt_vaddr;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	struct sg_page_iter sg_iter;
@@ -170,6 +237,7 @@
 		gtt_total_entries(dev_priv->gtt) - I915_PPGTT_PD_ENTRIES;
 
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
+	ppgtt->enable = gen6_ppgtt_enable;
 	ppgtt->clear_range = gen6_ppgtt_clear_range;
 	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->cleanup = gen6_ppgtt_cleanup;
@@ -203,12 +271,10 @@
 		ppgtt->pt_dma_addr[i] = pt_addr;
 	}
 
-	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
-
 	ppgtt->clear_range(ppgtt, 0,
 			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
-	ppgtt->pd_offset = (first_pd_entry_in_global_pt)*sizeof(gtt_pte_t);
+	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
 
 	return 0;
 
@@ -240,8 +306,13 @@
 		return -ENOMEM;
 
 	ppgtt->dev = dev;
+	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
 
-	ret = gen6_ppgtt_init(ppgtt);
+	if (INTEL_INFO(dev)->gen < 8)
+		ret = gen6_ppgtt_init(ppgtt);
+	else
+		BUG();
+
 	if (ret)
 		kfree(ppgtt);
 	else
@@ -259,6 +330,7 @@
 		return;
 
 	ppgtt->cleanup(ppgtt);
+	dev_priv->mm.aliasing_ppgtt = NULL;
 }
 
 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
@@ -278,64 +350,6 @@
 			   obj->base.size >> PAGE_SHIFT);
 }
 
-void i915_gem_init_ppgtt(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t pd_offset;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	gtt_pte_t __iomem *pd_addr;
-	uint32_t pd_entry;
-	int i;
-
-	if (!dev_priv->mm.aliasing_ppgtt)
-		return;
-
-
-	pd_addr = (gtt_pte_t __iomem*)dev_priv->gtt.gsm + ppgtt->pd_offset/sizeof(gtt_pte_t);
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		dma_addr_t pt_addr;
-
-		pt_addr = ppgtt->pt_dma_addr[i];
-		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
-		pd_entry |= GEN6_PDE_VALID;
-
-		writel(pd_entry, pd_addr + i);
-	}
-	readl(pd_addr);
-
-	pd_offset = ppgtt->pd_offset;
-	pd_offset /= 64; /* in cachelines, */
-	pd_offset <<= 16;
-
-	if (INTEL_INFO(dev)->gen == 6) {
-		uint32_t ecochk, gab_ctl, ecobits;
-
-		ecobits = I915_READ(GAC_ECO_BITS);
-		I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
-
-		gab_ctl = I915_READ(GAB_CTL);
-		I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
-
-		ecochk = I915_READ(GAM_ECOCHK);
-		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
-				       ECOCHK_PPGTT_CACHE64B);
-		I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	} else if (INTEL_INFO(dev)->gen >= 7) {
-		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
-		/* GFX_MODE is per-ring on gen7+ */
-	}
-
-	for_each_ring(ring, dev_priv, i) {
-		if (INTEL_INFO(dev)->gen >= 7)
-			I915_WRITE(RING_MODE_GEN7(ring),
-				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
-		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
-	}
-}
-
 extern int intel_iommu_gfx_mapped;
 /* Certain Gen5 chipsets require require idling the GPU before
  * unmapping anything from the GTT when VT-d is enabled.
@@ -416,8 +430,8 @@
 				     enum i915_cache_level level)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	gtt_pte_t __iomem *gtt_entries =
-		(gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+	gen6_gtt_pte_t __iomem *gtt_entries =
+		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr;
@@ -451,8 +465,8 @@
 				  unsigned int num_entries)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	gtt_pte_t scratch_pte;
-	gtt_pte_t __iomem *gtt_base = (gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
+		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
 	int i;
 
@@ -626,9 +640,12 @@
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
 		int ret;
-		/* PPGTT pdes are stolen from global gtt ptes, so shrink the
-		 * aperture accordingly when using aliasing ppgtt. */
-		gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
+
+		if (INTEL_INFO(dev)->gen <= 7) {
+			/* PPGTT pdes are stolen from global gtt ptes, so shrink the
+			 * aperture accordingly when using aliasing ppgtt. */
+			gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
+		}
 
 		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 
@@ -736,10 +753,12 @@
 	else
 		*stolen = gen6_get_stolen_size(snb_gmch_ctl);
 
-	*gtt_total = (gtt_size / sizeof(gtt_pte_t)) << PAGE_SHIFT;
+	*gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
 
-	/* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
-	gtt_bus_addr = pci_resource_start(dev->pdev, 0) + (2<<20);
+	/* For Modern GENs the PTEs and register space are split in the BAR */
+	gtt_bus_addr = pci_resource_start(dev->pdev, 0) +
+		(pci_resource_len(dev->pdev, 0) / 2);
+
 	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size);
 	if (!dev_priv->gtt.gsm) {
 		DRM_ERROR("Failed to map the gtt page table\n");
@@ -796,7 +815,6 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_gtt *gtt = &dev_priv->gtt;
-	unsigned long gtt_size;
 	int ret;
 
 	if (INTEL_INFO(dev)->gen <= 5) {
@@ -814,8 +832,6 @@
 	if (ret)
 		return ret;
 
-	gtt_size = (dev_priv->gtt.total >> PAGE_SHIFT) * sizeof(gtt_pte_t);
-
 	/* GMADR is the PCI mmio aperture into the global GTT. */
 	DRM_INFO("Memory usable by graphics device = %zdM\n",
 		 dev_priv->gtt.total >> 20);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index c807eb9..537545b 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -217,9 +217,12 @@
 		tile_width = 512;
 
 	/* check maximum stride & object size */
-	if (INTEL_INFO(dev)->gen >= 4) {
-		/* i965 stores the end address of the gtt mapping in the fence
-		 * reg, so dont bother to check the size */
+	/* i965+ stores the end address of the gtt mapping in the fence
+	 * reg, so dont bother to check the size */
+	if (INTEL_INFO(dev)->gen >= 7) {
+		if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL)
+			return false;
+	} else if (INTEL_INFO(dev)->gen >= 4) {
 		if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
 			return false;
 	} else {
@@ -235,6 +238,9 @@
 		}
 	}
 
+	if (stride < tile_width)
+		return false;
+
 	/* 965+ just needs multiples of tile width */
 	if (INTEL_INFO(dev)->gen >= 4) {
 		if (stride & (tile_width - 1))
@@ -243,9 +249,6 @@
 	}
 
 	/* Pre-965 needs power of two tile widths */
-	if (stride < tile_width)
-		return false;
-
 	if (stride & (stride - 1))
 		return false;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4c5bdd0..0aa2ef0 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -88,7 +88,8 @@
 	[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
 };
 
-
+static void ibx_hpd_irq_setup(struct drm_device *dev);
+static void i915_hpd_irq_setup(struct drm_device *dev);
 
 /* For display hotplug interrupt */
 static void
@@ -336,13 +337,19 @@
 /*
  * Handle hotplug events outside the interrupt handler proper.
  */
+#define I915_REENABLE_HOTPLUG_DELAY (2*60*1000)
+
 static void i915_hotplug_work_func(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    hotplug_work);
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
+	struct intel_connector *intel_connector;
+	struct intel_encoder *intel_encoder;
+	struct drm_connector *connector;
+	unsigned long irqflags;
+	bool hpd_disabled = false;
 
 	/* HPD irq before everything is fully set up. */
 	if (!dev_priv->enable_hotplug_processing)
@@ -351,9 +358,36 @@
 	mutex_lock(&mode_config->mutex);
 	DRM_DEBUG_KMS("running encoder hotplug functions\n");
 
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
-		if (encoder->hot_plug)
-			encoder->hot_plug(encoder);
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	list_for_each_entry(connector, &mode_config->connector_list, head) {
+		intel_connector = to_intel_connector(connector);
+		intel_encoder = intel_connector->encoder;
+		if (intel_encoder->hpd_pin > HPD_NONE &&
+		    dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_MARK_DISABLED &&
+		    connector->polled == DRM_CONNECTOR_POLL_HPD) {
+			DRM_INFO("HPD interrupt storm detected on connector %s: "
+				 "switching from hotplug detection to polling\n",
+				drm_get_connector_name(connector));
+			dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark = HPD_DISABLED;
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT
+				| DRM_CONNECTOR_POLL_DISCONNECT;
+			hpd_disabled = true;
+		}
+	}
+	 /* if there were no outputs to poll, poll was disabled,
+	  * therefore make sure it's enabled when disabling HPD on
+	  * some connectors */
+	if (hpd_disabled) {
+		drm_kms_helper_poll_enable(dev);
+		mod_timer(&dev_priv->hotplug_reenable_timer,
+			  jiffies + msecs_to_jiffies(I915_REENABLE_HOTPLUG_DELAY));
+	}
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
+		if (intel_encoder->hot_plug)
+			intel_encoder->hot_plug(intel_encoder);
 
 	mutex_unlock(&mode_config->mutex);
 
@@ -582,6 +616,45 @@
 	queue_work(dev_priv->wq, &dev_priv->rps.work);
 }
 
+#define HPD_STORM_DETECT_PERIOD 1000
+#define HPD_STORM_THRESHOLD 5
+
+static inline bool hotplug_irq_storm_detect(struct drm_device *dev,
+					    u32 hotplug_trigger,
+					    const u32 *hpd)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+	int i;
+	bool ret = false;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	for (i = 1; i < HPD_NUM_PINS; i++) {
+
+		if (!(hpd[i] & hotplug_trigger) ||
+		    dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED)
+			continue;
+
+		if (!time_in_range(jiffies, dev_priv->hpd_stats[i].hpd_last_jiffies,
+				   dev_priv->hpd_stats[i].hpd_last_jiffies
+				   + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD))) {
+			dev_priv->hpd_stats[i].hpd_last_jiffies = jiffies;
+			dev_priv->hpd_stats[i].hpd_cnt = 0;
+		} else if (dev_priv->hpd_stats[i].hpd_cnt > HPD_STORM_THRESHOLD) {
+			dev_priv->hpd_stats[i].hpd_mark = HPD_MARK_DISABLED;
+			DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", i);
+			ret = true;
+		} else {
+			dev_priv->hpd_stats[i].hpd_cnt++;
+		}
+	}
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	return ret;
+}
+
 static void gmbus_irq_handler(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -650,13 +723,16 @@
 		/* Consume port.  Then clear IIR or we'll miss events */
 		if (iir & I915_DISPLAY_PORT_INTERRUPT) {
 			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
+			u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
 
 			DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n",
 					 hotplug_status);
-			if (hotplug_status & HOTPLUG_INT_STATUS_I915)
+			if (hotplug_trigger) {
+				if (hotplug_irq_storm_detect(dev, hotplug_trigger, hpd_status_i915))
+					i915_hpd_irq_setup(dev);
 				queue_work(dev_priv->wq,
 					   &dev_priv->hotplug_work);
-
+			}
 			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
 			I915_READ(PORT_HOTPLUG_STAT);
 		}
@@ -680,10 +756,13 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
+	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK;
 
-	if (pch_iir & SDE_HOTPLUG_MASK)
+	if (hotplug_trigger) {
+		if (hotplug_irq_storm_detect(dev, hotplug_trigger, hpd_ibx))
+			ibx_hpd_irq_setup(dev);
 		queue_work(dev_priv->wq, &dev_priv->hotplug_work);
-
+	}
 	if (pch_iir & SDE_AUDIO_POWER_MASK)
 		DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
 				 (pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -726,10 +805,13 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
+	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT;
 
-	if (pch_iir & SDE_HOTPLUG_MASK_CPT)
+	if (hotplug_trigger) {
+		if (hotplug_irq_storm_detect(dev, hotplug_trigger, hpd_cpt))
+			ibx_hpd_irq_setup(dev);
 		queue_work(dev_priv->wq, &dev_priv->hotplug_work);
-
+	}
 	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT)
 		DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
 				 (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
@@ -758,7 +840,7 @@
 {
 	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier;
+	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier = 0;
 	irqreturn_t ret = IRQ_NONE;
 	int i;
 
@@ -773,9 +855,11 @@
 	 * able to process them after we restore SDEIER (as soon as we restore
 	 * it, we'll get an interrupt if SDEIIR still has something to process
 	 * due to its back queue). */
-	sde_ier = I915_READ(SDEIER);
-	I915_WRITE(SDEIER, 0);
-	POSTING_READ(SDEIER);
+	if (!HAS_PCH_NOP(dev)) {
+		sde_ier = I915_READ(SDEIER);
+		I915_WRITE(SDEIER, 0);
+		POSTING_READ(SDEIER);
+	}
 
 	gt_iir = I915_READ(GTIIR);
 	if (gt_iir) {
@@ -802,7 +886,7 @@
 		}
 
 		/* check event from PCH */
-		if (de_iir & DE_PCH_EVENT_IVB) {
+		if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
 			u32 pch_iir = I915_READ(SDEIIR);
 
 			cpt_irq_handler(dev, pch_iir);
@@ -825,8 +909,10 @@
 
 	I915_WRITE(DEIER, de_ier);
 	POSTING_READ(DEIER);
-	I915_WRITE(SDEIER, sde_ier);
-	POSTING_READ(SDEIER);
+	if (!HAS_PCH_NOP(dev)) {
+		I915_WRITE(SDEIER, sde_ier);
+		POSTING_READ(SDEIER);
+	}
 
 	return ret;
 }
@@ -1209,7 +1295,7 @@
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6:
-		for (i = 0; i < 16; i++)
+		for (i = 0; i < dev_priv->num_fence_regs; i++)
 			error->fence[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
 		break;
 	case 5:
@@ -2027,6 +2113,9 @@
 	I915_WRITE(GTIER, 0x0);
 	POSTING_READ(GTIER);
 
+	if (HAS_PCH_NOP(dev))
+		return;
+
 	/* south display irq */
 	I915_WRITE(SDEIMR, 0xffffffff);
 	/*
@@ -2080,11 +2169,15 @@
 	u32 hotplug;
 
 	if (HAS_PCH_IBX(dev)) {
+		mask &= ~SDE_HOTPLUG_MASK;
 		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
-			mask |= hpd_ibx[intel_encoder->hpd_pin];
+			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
+				mask |= hpd_ibx[intel_encoder->hpd_pin];
 	} else {
+		mask &= ~SDE_HOTPLUG_MASK_CPT;
 		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
-			mask |= hpd_cpt[intel_encoder->hpd_pin];
+			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
+				mask |= hpd_cpt[intel_encoder->hpd_pin];
 	}
 
 	I915_WRITE(SDEIMR, ~mask);
@@ -2112,6 +2205,10 @@
 		mask = SDE_GMBUS | SDE_AUX_MASK;
 	else
 		mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT;
+
+	if (HAS_PCH_NOP(dev))
+		return;
+
 	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
 	I915_WRITE(SDEIMR, ~mask);
 }
@@ -2275,6 +2372,8 @@
 	if (!dev_priv)
 		return;
 
+	del_timer_sync(&dev_priv->hotplug_reenable_timer);
+
 	for_each_pipe(pipe)
 		I915_WRITE(PIPESTAT(pipe), 0xffff);
 
@@ -2296,6 +2395,8 @@
 	if (!dev_priv)
 		return;
 
+	del_timer_sync(&dev_priv->hotplug_reenable_timer);
+
 	I915_WRITE(HWSTAM, 0xffffffff);
 
 	I915_WRITE(DEIMR, 0xffffffff);
@@ -2306,6 +2407,9 @@
 	I915_WRITE(GTIER, 0x0);
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 
+	if (HAS_PCH_NOP(dev))
+		return;
+
 	I915_WRITE(SDEIMR, 0xffffffff);
 	I915_WRITE(SDEIER, 0x0);
 	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
@@ -2607,13 +2711,16 @@
 		if ((I915_HAS_HOTPLUG(dev)) &&
 		    (iir & I915_DISPLAY_PORT_INTERRUPT)) {
 			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
+			u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915;
 
 			DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n",
 				  hotplug_status);
-			if (hotplug_status & HOTPLUG_INT_STATUS_I915)
+			if (hotplug_trigger) {
+				if (hotplug_irq_storm_detect(dev, hotplug_trigger, hpd_status_i915))
+					i915_hpd_irq_setup(dev);
 				queue_work(dev_priv->wq,
 					   &dev_priv->hotplug_work);
-
+			}
 			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
 			POSTING_READ(PORT_HOTPLUG_STAT);
 		}
@@ -2669,6 +2776,8 @@
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int pipe;
 
+	del_timer_sync(&dev_priv->hotplug_reenable_timer);
+
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
@@ -2760,7 +2869,7 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
+	struct intel_encoder *intel_encoder;
 	u32 hotplug_en;
 
 	if (I915_HAS_HOTPLUG(dev)) {
@@ -2768,8 +2877,9 @@
 		hotplug_en &= ~HOTPLUG_INT_EN_MASK;
 		/* Note HDMI and DP share hotplug bits */
 		/* enable bits are the same for all generations */
-		list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
-			hotplug_en |= hpd_mask_i915[encoder->hpd_pin];
+		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
+			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
+				hotplug_en |= hpd_mask_i915[intel_encoder->hpd_pin];
 		/* Programming the CRT detection parameters tends
 		   to generate a spurious hotplug event about three
 		   seconds later.  So just do it once.
@@ -2840,15 +2950,19 @@
 		/* Consume port.  Then clear IIR or we'll miss events */
 		if (iir & I915_DISPLAY_PORT_INTERRUPT) {
 			u32 hotplug_status = I915_READ(PORT_HOTPLUG_STAT);
+			u32 hotplug_trigger = hotplug_status & (IS_G4X(dev) ?
+								  HOTPLUG_INT_STATUS_G4X :
+								  HOTPLUG_INT_STATUS_I965);
 
 			DRM_DEBUG_DRIVER("hotplug event received, stat 0x%08x\n",
 				  hotplug_status);
-			if (hotplug_status & (IS_G4X(dev) ?
-					      HOTPLUG_INT_STATUS_G4X :
-					      HOTPLUG_INT_STATUS_I965))
+			if (hotplug_trigger) {
+				if (hotplug_irq_storm_detect(dev, hotplug_trigger,
+							    IS_G4X(dev) ? hpd_status_gen4 : hpd_status_i965))
+					i915_hpd_irq_setup(dev);
 				queue_work(dev_priv->wq,
 					   &dev_priv->hotplug_work);
-
+			}
 			I915_WRITE(PORT_HOTPLUG_STAT, hotplug_status);
 			I915_READ(PORT_HOTPLUG_STAT);
 		}
@@ -2908,6 +3022,8 @@
 	if (!dev_priv)
 		return;
 
+	del_timer_sync(&dev_priv->hotplug_reenable_timer);
+
 	I915_WRITE(PORT_HOTPLUG_EN, 0);
 	I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
 
@@ -2923,6 +3039,41 @@
 	I915_WRITE(IIR, I915_READ(IIR));
 }
 
+static void i915_reenable_hotplug_timer_func(unsigned long data)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *)data;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	unsigned long irqflags;
+	int i;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	for (i = (HPD_NONE + 1); i < HPD_NUM_PINS; i++) {
+		struct drm_connector *connector;
+
+		if (dev_priv->hpd_stats[i].hpd_mark != HPD_DISABLED)
+			continue;
+
+		dev_priv->hpd_stats[i].hpd_mark = HPD_ENABLED;
+
+		list_for_each_entry(connector, &mode_config->connector_list, head) {
+			struct intel_connector *intel_connector = to_intel_connector(connector);
+
+			if (intel_connector->encoder->hpd_pin == i) {
+				if (connector->polled != intel_connector->polled)
+					DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n",
+							 drm_get_connector_name(connector));
+				connector->polled = intel_connector->polled;
+				if (!connector->polled)
+					connector->polled = DRM_CONNECTOR_POLL_HPD;
+			}
+		}
+	}
+	if (dev_priv->display.hpd_irq_setup)
+		dev_priv->display.hpd_irq_setup(dev);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
 void intel_irq_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2935,6 +3086,8 @@
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
 		    (unsigned long) dev);
+	setup_timer(&dev_priv->hotplug_reenable_timer, i915_reenable_hotplug_timer_func,
+		    (unsigned long) dev_priv);
 
 	pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
 
@@ -3003,7 +3156,20 @@
 void intel_hpd_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct drm_connector *connector;
+	int i;
 
+	for (i = 1; i < HPD_NUM_PINS; i++) {
+		dev_priv->hpd_stats[i].hpd_cnt = 0;
+		dev_priv->hpd_stats[i].hpd_mark = HPD_ENABLED;
+	}
+	list_for_each_entry(connector, &mode_config->connector_list, head) {
+		struct intel_connector *intel_connector = to_intel_connector(connector);
+		connector->polled = intel_connector->polled;
+		if (!connector->polled && I915_HAS_HOTPLUG(dev) && intel_connector->encoder->hpd_pin > HPD_NONE)
+			connector->polled = DRM_CONNECTOR_POLL_HPD;
+	}
 	if (dev_priv->display.hpd_irq_setup)
 		dev_priv->display.hpd_irq_setup(dev);
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 058686c..31de7e4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -125,8 +125,14 @@
 #define   HSW_ECOCHK_ARB_PRIO_SOL	(1<<6)
 #define   ECOCHK_PPGTT_CACHE64B		(0x3<<3)
 #define   ECOCHK_PPGTT_CACHE4B		(0x0<<3)
+#define   ECOCHK_PPGTT_GFDT_IVB		(0x1<<4)
+#define   ECOCHK_PPGTT_LLC_IVB		(0x1<<3)
+#define   ECOCHK_PPGTT_UC_HSW		(0x1<<3)
+#define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
+#define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
 #define GAC_ECO_BITS			0x14090
+#define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
 #define   ECOBITS_PPGTT_CACHE4B		(0<<8)
 
@@ -424,6 +430,7 @@
 
 #define FENCE_REG_SANDYBRIDGE_0		0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
+#define   GEN7_FENCE_MAX_PITCH_VAL	0x0800
 
 /* control register for cpu gtt access */
 #define TILECTL				0x101000
@@ -1203,6 +1210,9 @@
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
+/* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
+#define DCLK 0x5e04
+
 /** 915-945 and GM965 MCH register controlling DRAM channel access */
 #define DCC			0x10200
 #define DCC_ADDRESSING_MODE_SINGLE_CHANNEL		(0 << 0)
@@ -3568,6 +3578,9 @@
 #define DISP_ARB_CTL	0x45000
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
+#define GEN7_MSG_CTL	0x45010
+#define  WAIT_FOR_PCH_RESET_ACK		(1<<1)
+#define  WAIT_FOR_PCH_FLR_ACK		(1<<0)
 
 /* GEN7 chicken */
 #define GEN7_COMMON_SLICE_CHICKEN1		0x7010
@@ -3946,8 +3959,11 @@
 #define _TRANSA_CHICKEN2	 0xf0064
 #define _TRANSB_CHICKEN2	 0xf1064
 #define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
-#define  TRANS_CHICKEN2_TIMING_OVERRIDE		(1<<31)
-
+#define  TRANS_CHICKEN2_TIMING_OVERRIDE			(1<<31)
+#define  TRANS_CHICKEN2_FDI_POLARITY_REVERSED		(1<<29)
+#define  TRANS_CHICKEN2_FRAME_START_DELAY_MASK		(3<<27)
+#define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER	(1<<26)
+#define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH	(1<<25)
 
 #define SOUTH_CHICKEN1		0xc2000
 #define  FDIA_PHASE_SYNC_SHIFT_OVR	19
@@ -4380,6 +4396,7 @@
 #define   GEN6_DECODE_RC6_VID(vids)		(((vids) * 5) + 245)
 #define GEN6_PCODE_DATA				0x138128
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
+#define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
 
 #define VLV_IOSF_DOORBELL_REQ			0x182100
 #define   IOSF_DEVFN_SHIFT			24
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a3a3e22..d5e1890 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -239,7 +239,7 @@
 	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
 	struct drm_device *dev = minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 val, rp_state_cap, hw_max, hw_min;
+	u32 val, rp_state_cap, hw_max, hw_min, non_oc_max;
 	ssize_t ret;
 
 	ret = kstrtou32(buf, 0, &val);
@@ -251,7 +251,8 @@
 	mutex_lock(&dev_priv->rps.hw_lock);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-	hw_max = (rp_state_cap & 0xff);
+	hw_max = dev_priv->rps.hw_max;
+	non_oc_max = (rp_state_cap & 0xff);
 	hw_min = ((rp_state_cap & 0xff0000) >> 16);
 
 	if (val < hw_min || val > hw_max || val < dev_priv->rps.min_delay) {
@@ -259,6 +260,10 @@
 		return -EINVAL;
 	}
 
+	if (val > non_oc_max)
+		DRM_DEBUG("User requested overclocking to %d\n",
+			  val * GT_FREQUENCY_MULTIPLIER);
+
 	if (dev_priv->rps.cur_delay > val)
 		gen6_set_rps(dev_priv->dev, val);
 
@@ -302,7 +307,7 @@
 	mutex_lock(&dev_priv->rps.hw_lock);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-	hw_max = (rp_state_cap & 0xff);
+	hw_max = dev_priv->rps.hw_max;
 	hw_min = ((rp_state_cap & 0xff0000) >> 16);
 
 	if (val < hw_min || val > hw_max || val > dev_priv->rps.max_delay) {
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 55ffba1..95070b2 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -351,12 +351,14 @@
 		dev_priv->lvds_ssc_freq =
 			intel_bios_ssc_frequency(dev, general->ssc_freq);
 		dev_priv->display_clock_mode = general->display_clock_mode;
-		DRM_DEBUG_KMS("BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d\n",
+		dev_priv->fdi_rx_polarity_inverted = general->fdi_rx_polarity_inverted;
+		DRM_DEBUG_KMS("BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d fdi_rx_polarity_inverted %d\n",
 			      dev_priv->int_tv_support,
 			      dev_priv->int_crt_support,
 			      dev_priv->lvds_use_ssc,
 			      dev_priv->lvds_ssc_freq,
-			      dev_priv->display_clock_mode);
+			      dev_priv->display_clock_mode,
+			      dev_priv->fdi_rx_polarity_inverted);
 	}
 }
 
@@ -692,6 +694,9 @@
 	struct bdb_header *bdb = NULL;
 	u8 __iomem *bios = NULL;
 
+	if (HAS_PCH_NOP(dev))
+		return -ENODEV;
+
 	init_vbt_defaults(dev_priv);
 
 	/* XXX Should this validation be moved to intel_opregion.c? */
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 36e57f9..e088d6f 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -127,7 +127,9 @@
         /* bits 3 */
 	u8 disable_smooth_vision:1;
 	u8 single_dvi:1;
-	u8 rsvd9:6; /* finish byte */
+	u8 rsvd9:1;
+	u8 fdi_rx_polarity_inverted:1;
+	u8 rsvd10:4; /* finish byte */
 
         /* bits 4 */
 	u8 legacy_monitor_detect;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 1d8d63a..58b4a53 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -787,10 +787,8 @@
 
 	drm_sysfs_connector_add(connector);
 
-	if (I915_HAS_HOTPLUG(dev))
-		connector->polled = DRM_CONNECTOR_POLL_HPD;
-	else
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+	if (!I915_HAS_HOTPLUG(dev))
+		intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
 	/*
 	 * Configure the automatic hotplug detection stuff
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 22524cb..26a0a57 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -924,7 +924,7 @@
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	int type = intel_encoder->type;
 	uint32_t temp;
 
@@ -958,7 +958,7 @@
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	enum pipe pipe = intel_crtc->pipe;
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
 	int type = intel_encoder->type;
 	uint32_t temp;
@@ -1223,7 +1223,7 @@
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 
 	if (cpu_transcoder != TRANSCODER_EDP)
 		I915_WRITE(TRANS_CLK_SEL(cpu_transcoder),
@@ -1233,7 +1233,7 @@
 void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
 {
 	struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private;
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 
 	if (cpu_transcoder != TRANSCODER_EDP)
 		I915_WRITE(TRANS_CLK_SEL(cpu_transcoder),
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b700564..6e423e0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -892,7 +892,7 @@
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	return intel_crtc->cpu_transcoder;
+	return intel_crtc->config.cpu_transcoder;
 }
 
 static void ironlake_wait_for_vblank(struct drm_device *dev, int pipe)
@@ -1227,8 +1227,8 @@
 	if (pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE)
 		state = true;
 
-	if (IS_HASWELL(dev_priv->dev) && cpu_transcoder != TRANSCODER_EDP &&
-	    !(I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_ENABLE)) {
+	if (!intel_using_power_well(dev_priv->dev) &&
+	    cpu_transcoder != TRANSCODER_EDP) {
 		cur_state = false;
 	} else {
 		reg = PIPECONF(cpu_transcoder);
@@ -2002,8 +2002,10 @@
 		alignment = 0;
 		break;
 	case I915_TILING_Y:
-		/* FIXME: Is this true? */
-		DRM_ERROR("Y tiled not allowed for scan out buffers\n");
+		/* Despite that we check this in framebuffer_init userspace can
+		 * screw us over and change the tiling after the fact. Only
+		 * pinned buffers can't change their tiling. */
+		DRM_DEBUG_DRIVER("Y tiled not allowed for scan out buffers\n");
 		return -EINVAL;
 	default:
 		BUG();
@@ -3201,7 +3203,7 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 
 	assert_transcoder_disabled(dev_priv, TRANSCODER_A);
 
@@ -3576,7 +3578,7 @@
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 
 	if (!intel_crtc->active)
 		return;
@@ -3597,9 +3599,13 @@
 
 	intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
-	/* Disable PF */
-	I915_WRITE(PF_CTL(pipe), 0);
-	I915_WRITE(PF_WIN_SZ(pipe), 0);
+	/* XXX: Once we have proper panel fitter state tracking implemented with
+	 * hardware state read/check support we should switch to only disable
+	 * the panel fitter when we know it's used. */
+	if (intel_using_power_well(dev)) {
+		I915_WRITE(PF_CTL(pipe), 0);
+		I915_WRITE(PF_WIN_SZ(pipe), 0);
+	}
 
 	intel_ddi_disable_pipe_clock(intel_crtc);
 
@@ -3632,7 +3638,7 @@
 
 	/* Stop saying we're using TRANSCODER_EDP because some other CRTC might
 	 * start using it. */
-	intel_crtc->cpu_transcoder = (enum transcoder) intel_crtc->pipe;
+	intel_crtc->config.cpu_transcoder = (enum transcoder) intel_crtc->pipe;
 
 	intel_ddi_put_crtc_pll(crtc);
 }
@@ -3718,6 +3724,26 @@
 		encoder->enable(encoder);
 }
 
+static void i9xx_pfit_disable(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum pipe pipe;
+	uint32_t pctl = I915_READ(PFIT_CONTROL);
+
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+
+	if (INTEL_INFO(dev)->gen >= 4)
+		pipe = (pctl & PFIT_PIPE_MASK) >> PFIT_PIPE_SHIFT;
+	else
+		pipe = PIPE_B;
+
+	if (pipe == crtc->pipe) {
+		DRM_DEBUG_DRIVER("disabling pfit, current: 0x%08x\n", pctl);
+		I915_WRITE(PFIT_CONTROL, 0);
+	}
+}
+
 static void i9xx_crtc_disable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3726,8 +3752,6 @@
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	u32 pctl;
-
 
 	if (!intel_crtc->active)
 		return;
@@ -3747,11 +3771,7 @@
 	intel_disable_plane(dev_priv, plane, pipe);
 	intel_disable_pipe(dev_priv, pipe);
 
-	/* Disable pannel fitter if it is on this pipe. */
-	pctl = I915_READ(PFIT_CONTROL);
-	if ((pctl & PFIT_ENABLE) &&
-	    ((pctl & PFIT_PIPE_MASK) >> PFIT_PIPE_SHIFT) == pipe)
-		I915_WRITE(PFIT_CONTROL, 0);
+	i9xx_pfit_disable(intel_crtc);
 
 	intel_disable_pll(dev_priv, pipe);
 
@@ -3983,9 +4003,9 @@
 		adjusted_mode->hsync_start == adjusted_mode->hdisplay)
 		return false;
 
-	if ((IS_G4X(dev) || IS_VALLEYVIEW(dev)) && pipe_config->pipe_bpp > 10) {
+	if ((IS_G4X(dev) || IS_VALLEYVIEW(dev)) && pipe_config->pipe_bpp > 10*3) {
 		pipe_config->pipe_bpp = 10*3; /* 12bpc is gen5+ */
-	} else if (INTEL_INFO(dev)->gen <= 4 && pipe_config->pipe_bpp > 8) {
+	} else if (INTEL_INFO(dev)->gen <= 4 && pipe_config->pipe_bpp > 8*3) {
 		/* only a 8bpc pipe, with 6bpc dither through the panel fitter
 		 * for lvds. */
 		pipe_config->pipe_bpp = 8*3;
@@ -4474,7 +4494,7 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe = intel_crtc->pipe;
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	uint32_t vsyncshift;
 
 	if (!IS_GEN2(dev) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
@@ -4956,13 +4976,6 @@
 	tmp |= (0x12 << 24);
 	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x808C, SBI_MPHY);
-		tmp &= ~(0x3 << 6);
-		tmp |= (1 << 6) | (1 << 0);
-		intel_sbi_write(dev_priv, 0x808C, tmp, SBI_MPHY);
-	}
-
 	if (is_sdv) {
 		tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
 		tmp |= 0x7FFF;
@@ -5223,7 +5236,7 @@
 {
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	uint32_t val;
 
 	val = I915_READ(PIPECONF(cpu_transcoder));
@@ -5417,7 +5430,7 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
-	enum transcoder transcoder = crtc->cpu_transcoder;
+	enum transcoder transcoder = crtc->config.cpu_transcoder;
 
 	if (INTEL_INFO(dev)->gen >= 5) {
 		I915_WRITE(PIPE_DATA_M1(transcoder), TU_SIZE(m_n->tu) | m_n->gmch_m);
@@ -5469,7 +5482,8 @@
 }
 
 static uint32_t ironlake_compute_dpll(struct intel_crtc *intel_crtc,
-				      intel_clock_t *clock, u32 fp)
+				      intel_clock_t *clock, u32 *fp,
+				      intel_clock_t *reduced_clock, u32 *fp2)
 {
 	struct drm_crtc *crtc = &intel_crtc->base;
 	struct drm_device *dev = crtc->dev;
@@ -5503,13 +5517,16 @@
 	if (is_lvds) {
 		if ((intel_panel_use_ssc(dev_priv) &&
 		     dev_priv->lvds_ssc_freq == 100) ||
-		    intel_is_dual_link_lvds(dev))
+		    (HAS_PCH_IBX(dev) && intel_is_dual_link_lvds(dev)))
 			factor = 25;
 	} else if (is_sdvo && is_tv)
 		factor = 20;
 
 	if (clock->m < factor * clock->n)
-		fp |= FP_CB_TUNE;
+		*fp |= FP_CB_TUNE;
+
+	if (fp2 && (reduced_clock->m < factor * reduced_clock->n))
+		*fp2 |= FP_CB_TUNE;
 
 	dpll = 0;
 
@@ -5596,7 +5613,7 @@
 	WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
 	     "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev));
 
-	intel_crtc->cpu_transcoder = pipe;
+	intel_crtc->config.cpu_transcoder = pipe;
 
 	ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock,
 				     &has_reduced_clock, &reduced_clock);
@@ -5626,7 +5643,8 @@
 		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
 			reduced_clock.m2;
 
-	dpll = ironlake_compute_dpll(intel_crtc, &clock, fp);
+	dpll = ironlake_compute_dpll(intel_crtc, &clock, &fp, &reduced_clock,
+				     has_reduced_clock ? &fp2 : NULL);
 
 	DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe);
 	drm_mode_debug_printmodeline(mode);
@@ -5779,9 +5797,9 @@
 	}
 
 	if (is_cpu_edp)
-		intel_crtc->cpu_transcoder = TRANSCODER_EDP;
+		intel_crtc->config.cpu_transcoder = TRANSCODER_EDP;
 	else
-		intel_crtc->cpu_transcoder = pipe;
+		intel_crtc->config.cpu_transcoder = pipe;
 
 	/* We are not sure yet this won't happen. */
 	WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n",
@@ -5790,7 +5808,7 @@
 	WARN(num_connectors != 1, "%d connectors attached to pipe %c\n",
 	     num_connectors, pipe_name(pipe));
 
-	WARN_ON(I915_READ(PIPECONF(intel_crtc->cpu_transcoder)) &
+	WARN_ON(I915_READ(PIPECONF(intel_crtc->config.cpu_transcoder)) &
 		(PIPECONF_ENABLE | I965_PIPECONF_ACTIVE));
 
 	WARN_ON(I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE);
@@ -5841,7 +5859,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
-	tmp = I915_READ(PIPECONF(crtc->cpu_transcoder));
+	tmp = I915_READ(PIPECONF(crtc->config.cpu_transcoder));
 	if (!(tmp & PIPECONF_ENABLE))
 		return false;
 
@@ -6809,7 +6827,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	struct drm_display_mode *mode;
 	int htot = I915_READ(HTOTAL(cpu_transcoder));
 	int hsync = I915_READ(HSYNC(cpu_transcoder));
@@ -7708,22 +7726,25 @@
 	if (crtc->enabled)
 		*prepare_pipes |= 1 << intel_crtc->pipe;
 
-	/* We only support modeset on one single crtc, hence we need to do that
-	 * only for the passed in crtc iff we change anything else than just
-	 * disable crtcs.
-	 *
-	 * This is actually not true, to be fully compatible with the old crtc
-	 * helper we automatically disable _any_ output (i.e. doesn't need to be
-	 * connected to the crtc we're modesetting on) if it's disconnected.
-	 * Which is a rather nutty api (since changed the output configuration
-	 * without userspace's explicit request can lead to confusion), but
-	 * alas. Hence we currently need to modeset on all pipes we prepare. */
+	/*
+	 * For simplicity do a full modeset on any pipe where the output routing
+	 * changed. We could be more clever, but that would require us to be
+	 * more careful with calling the relevant encoder->mode_set functions.
+	 */
 	if (*prepare_pipes)
 		*modeset_pipes = *prepare_pipes;
 
 	/* ... and mask these out. */
 	*modeset_pipes &= ~(*disable_pipes);
 	*prepare_pipes &= ~(*disable_pipes);
+
+	/*
+	 * HACK: We don't (yet) fully support global modesets. intel_set_config
+	 * obies this rule, but the modeset restore mode of
+	 * intel_modeset_setup_hw_state does not.
+	 */
+	*modeset_pipes &= 1 << intel_crtc->pipe;
+	*prepare_pipes &= 1 << intel_crtc->pipe;
 }
 
 static bool intel_crtc_in_use(struct drm_crtc *crtc)
@@ -7916,9 +7937,9 @@
 	}
 }
 
-int intel_set_mode(struct drm_crtc *crtc,
-		   struct drm_display_mode *mode,
-		   int x, int y, struct drm_framebuffer *fb)
+static int __intel_set_mode(struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    int x, int y, struct drm_framebuffer *fb)
 {
 	struct drm_device *dev = crtc->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -7969,10 +7990,12 @@
 	 * to set it here already despite that we pass it down the callchain.
 	 */
 	if (modeset_pipes) {
+		enum transcoder tmp = to_intel_crtc(crtc)->config.cpu_transcoder;
 		crtc->mode = *mode;
 		/* mode_set/enable/disable functions rely on a correct pipe
 		 * config. */
 		to_intel_crtc(crtc)->config = *pipe_config;
+		to_intel_crtc(crtc)->config.cpu_transcoder = tmp;
 	}
 
 	/* Only after disabling all output pipelines that will be changed can we
@@ -8012,8 +8035,6 @@
 	if (ret && crtc->enabled) {
 		crtc->hwmode = *saved_hwmode;
 		crtc->mode = *saved_mode;
-	} else {
-		intel_modeset_check_state(dev);
 	}
 
 out:
@@ -8022,6 +8043,20 @@
 	return ret;
 }
 
+int intel_set_mode(struct drm_crtc *crtc,
+		     struct drm_display_mode *mode,
+		     int x, int y, struct drm_framebuffer *fb)
+{
+	int ret;
+
+	ret = __intel_set_mode(crtc, mode, x, y, fb);
+
+	if (ret == 0)
+		intel_modeset_check_state(crtc->dev);
+
+	return ret;
+}
+
 void intel_crtc_restore_mode(struct drm_crtc *crtc)
 {
 	intel_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->fb);
@@ -8371,7 +8406,7 @@
 	/* Swap pipes & planes for FBC on pre-965 */
 	intel_crtc->pipe = pipe;
 	intel_crtc->plane = pipe;
-	intel_crtc->cpu_transcoder = pipe;
+	intel_crtc->config.cpu_transcoder = pipe;
 	if (IS_MOBILE(dev) && IS_GEN3(dev)) {
 		DRM_DEBUG_KMS("swapping pipes & planes for FBC\n");
 		intel_crtc->plane = !pipe;
@@ -8462,7 +8497,7 @@
 		I915_WRITE(PFIT_CONTROL, 0);
 	}
 
-	if (!(HAS_DDI(dev) && (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)))
+	if (!IS_ULT(dev))
 		intel_crt_init(dev);
 
 	if (HAS_DDI(dev)) {
@@ -8991,6 +9026,9 @@
 
 	intel_init_pm(dev);
 
+	if (INTEL_INFO(dev)->num_pipes == 0)
+		return;
+
 	intel_init_display(dev);
 
 	if (IS_GEN2(dev)) {
@@ -9093,7 +9131,7 @@
 	u32 reg;
 
 	/* Clear any frame start delays used for debugging left by the BIOS */
-	reg = PIPECONF(crtc->cpu_transcoder);
+	reg = PIPECONF(crtc->config.cpu_transcoder);
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
 	/* We need to sanitize the plane -> pipe mapping first because this will
@@ -9259,7 +9297,7 @@
 			}
 
 			crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
-			crtc->cpu_transcoder = TRANSCODER_EDP;
+			crtc->config.cpu_transcoder = TRANSCODER_EDP;
 
 			DRM_DEBUG_KMS("Pipe %c using transcoder EDP\n",
 				      pipe_name(pipe));
@@ -9269,7 +9307,10 @@
 setup_pipes:
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
 			    base.head) {
+		enum transcoder tmp = crtc->config.cpu_transcoder;
 		memset(&crtc->config, 0, sizeof(crtc->config));
+		crtc->config.cpu_transcoder = tmp;
+
 		crtc->active = dev_priv->display.get_pipe_config(crtc,
 								 &crtc->config);
 
@@ -9330,10 +9371,16 @@
 	}
 
 	if (force_restore) {
+		/*
+		 * We need to use raw interfaces for restoring state to avoid
+		 * checking (bogus) intermediate states.
+		 */
 		for_each_pipe(pipe) {
 			struct drm_crtc *crtc =
 				dev_priv->pipe_to_crtc_mapping[pipe];
-			intel_crtc_restore_mode(crtc);
+
+			__intel_set_mode(crtc, &crtc->mode, crtc->x, crtc->y,
+					 crtc->fb);
 		}
 		list_for_each_entry(plane, &dev->mode_config.plane_list, head)
 			intel_plane_restore(plane);
@@ -9398,6 +9445,9 @@
 	/* flush any delayed tasks or pending work */
 	flush_scheduled_work();
 
+	/* destroy backlight, if any, before the connectors */
+	intel_panel_destroy_backlight(dev);
+
 	drm_mode_config_cleanup(dev);
 
 	intel_cleanup_overlay(dev);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b30e82b..a328837 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -353,10 +353,14 @@
 			aux_clock_divider = 200; /* SNB & IVB eDP input clock at 400Mhz */
 		else
 			aux_clock_divider = 225; /* eDP input clock at 450Mhz */
-	} else if (HAS_PCH_SPLIT(dev))
+	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
+		/* Workaround for non-ULT HSW */
+		aux_clock_divider = 74;
+	} else if (HAS_PCH_SPLIT(dev)) {
 		aux_clock_divider = DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
-	else
+	} else {
 		aux_clock_divider = intel_hrawclk(dev) / 2;
+	}
 
 	if (IS_GEN6(dev))
 		precharge = 3;
@@ -2470,17 +2474,14 @@
 static void
 intel_dp_destroy(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 
 	if (!IS_ERR_OR_NULL(intel_connector->edid))
 		kfree(intel_connector->edid);
 
-	if (is_edp(intel_dp)) {
-		intel_panel_destroy_backlight(dev);
+	if (is_edp(intel_dp))
 		intel_panel_fini(&intel_connector->panel);
-	}
 
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
@@ -2789,7 +2790,6 @@
 	drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
 	drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
 
-	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	connector->interlace_allowed = true;
 	connector->doublescan_allowed = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d7bd031..b5b6d19 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -171,6 +171,10 @@
 
 	/* Cached EDID for eDP and LVDS. May hold ERR_PTR for invalid EDID. */
 	struct edid *edid;
+
+	/* since POLL and HPD connectors may use the same HPD line keep the native
+	   state of connector->polled in case hotplug storm detection changes it */
+	u8 polled;
 };
 
 struct intel_crtc_config {
@@ -184,6 +188,10 @@
 	 * between pch encoders and cpu encoders. */
 	bool has_pch_encoder;
 
+	/* CPU Transcoder for the pipe. Currently this can only differ from the
+	 * pipe on Haswell (where we have a special eDP transcoder). */
+	enum transcoder cpu_transcoder;
+
 	/*
 	 * Use reduced/limited/broadcast rbg range, compressing from the full
 	 * range fed into the crtcs.
@@ -222,7 +230,6 @@
 	struct drm_crtc base;
 	enum pipe pipe;
 	enum plane plane;
-	enum transcoder cpu_transcoder;
 	u8 lut_r[256], lut_g[256], lut_b[256];
 	/*
 	 * Whether the crtc and the connected output pipeline is active. Implies
@@ -693,6 +700,7 @@
 extern void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
 extern void intel_gpu_ips_teardown(void);
 
+extern bool intel_using_power_well(struct drm_device *dev);
 extern void intel_init_power_well(struct drm_device *dev);
 extern void intel_set_power_well(struct drm_device *dev, bool enable);
 extern void intel_enable_gt_powersave(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 8d81c929..0e19e57 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -283,6 +283,9 @@
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_plane *plane;
 
+	if (INTEL_INFO(dev)->num_pipes == 0)
+		return;
+
 	drm_modeset_lock_all(dev);
 
 	ret = drm_fb_helper_restore_fbdev_mode(&dev_priv->fbdev->helper);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ee4a8da..3e6a3ef 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -294,8 +294,8 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(intel_crtc->cpu_transcoder);
-	u32 data_reg = hsw_infoframe_data_reg(frame, intel_crtc->cpu_transcoder);
+	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config.cpu_transcoder);
+	u32 data_reg = hsw_infoframe_data_reg(frame, intel_crtc->config.cpu_transcoder);
 	unsigned int i, len = DIP_HEADER_SIZE + frame->len;
 	u32 val = I915_READ(ctl_reg);
 
@@ -570,7 +570,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = HSW_TVIDEO_DIP_CTL(intel_crtc->cpu_transcoder);
+	u32 reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config.cpu_transcoder);
 	u32 val = I915_READ(reg);
 
 	assert_hdmi_port_disabled(intel_hdmi);
@@ -998,7 +998,6 @@
 			   DRM_MODE_CONNECTOR_HDMIA);
 	drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs);
 
-	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;
 
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index ef4744e..5d245031 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -522,7 +522,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret, i;
 
-	if (HAS_PCH_SPLIT(dev))
+	if (HAS_PCH_NOP(dev))
+		return 0;
+	else if (HAS_PCH_SPLIT(dev))
 		dev_priv->gpio_mmio_base = PCH_GPIOA - GPIOA;
 	else if (IS_VALLEYVIEW(dev))
 		dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index ca2d903..f36f1ba 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -631,7 +631,6 @@
 	if (!IS_ERR_OR_NULL(lvds_connector->base.edid))
 		kfree(lvds_connector->base.edid);
 
-	intel_panel_destroy_backlight(connector->dev);
 	intel_panel_fini(&lvds_connector->base.panel);
 
 	drm_sysfs_connector_remove(connector);
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 7874cec..eb5e6e9 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -428,6 +428,9 @@
 
 	intel_panel_init_backlight(dev);
 
+	if (WARN_ON(dev_priv->backlight.device))
+		return -ENODEV;
+
 	memset(&props, 0, sizeof(props));
 	props.type = BACKLIGHT_RAW;
 	props.brightness = dev_priv->backlight.level;
@@ -453,8 +456,10 @@
 void intel_panel_destroy_backlight(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	if (dev_priv->backlight.device)
+	if (dev_priv->backlight.device) {
 		backlight_device_unregister(dev_priv->backlight.device);
+		dev_priv->backlight.device = NULL;
+	}
 }
 #else
 int intel_panel_setup_backlight(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 13a0666..e34ad96 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2558,8 +2558,8 @@
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
-	/* In units of 100MHz */
-	dev_priv->rps.max_delay = rp_state_cap & 0xff;
+	/* In units of 50MHz */
+	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
 	dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
 	dev_priv->rps.cur_delay = 0;
 
@@ -2643,10 +2643,10 @@
 		pcu_mbox = 0;
 		ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
 		if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
-			DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max from %dMHz to %dMHz\n",
+			DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
 					 (dev_priv->rps.max_delay & 0xff) * 50,
 					 (pcu_mbox & 0xff) * 50);
-			dev_priv->rps.max_delay = pcu_mbox & 0xff;
+			dev_priv->rps.hw_max = pcu_mbox & 0xff;
 		}
 	} else {
 		DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
@@ -2684,8 +2684,8 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int min_freq = 15;
-	int gpu_freq;
-	unsigned int ia_freq, max_ia_freq;
+	unsigned int gpu_freq;
+	unsigned int max_ia_freq, min_ring_freq;
 	int scaling_factor = 180;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -2701,6 +2701,10 @@
 	/* Convert from kHz to MHz */
 	max_ia_freq /= 1000;
 
+	min_ring_freq = I915_READ(MCHBAR_MIRROR_BASE_SNB + DCLK);
+	/* convert DDR frequency from units of 133.3MHz to bandwidth */
+	min_ring_freq = (2 * 4 * min_ring_freq + 2) / 3;
+
 	/*
 	 * For each potential GPU frequency, load a ring frequency we'd like
 	 * to use for memory access.  We do this by specifying the IA frequency
@@ -2709,21 +2713,32 @@
 	for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
 	     gpu_freq--) {
 		int diff = dev_priv->rps.max_delay - gpu_freq;
+		unsigned int ia_freq = 0, ring_freq = 0;
 
-		/*
-		 * For GPU frequencies less than 750MHz, just use the lowest
-		 * ring freq.
-		 */
-		if (gpu_freq < min_freq)
-			ia_freq = 800;
-		else
-			ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-		ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-		ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT;
+		if (IS_HASWELL(dev)) {
+			ring_freq = (gpu_freq * 5 + 3) / 4;
+			ring_freq = max(min_ring_freq, ring_freq);
+			/* leave ia_freq as the default, chosen by cpufreq */
+		} else {
+			/* On older processors, there is no separate ring
+			 * clock domain, so in order to boost the bandwidth
+			 * of the ring, we need to upclock the CPU (ia_freq).
+			 *
+			 * For GPU frequencies less than 750MHz,
+			 * just use the lowest ring freq.
+			 */
+			if (gpu_freq < min_freq)
+				ia_freq = 800;
+			else
+				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
+			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+		}
 
 		sandybridge_pcode_write(dev_priv,
 					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-					ia_freq | gpu_freq);
+					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+					gpu_freq);
 	}
 }
 
@@ -3575,6 +3590,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
+	uint32_t val;
 
 	/*
 	 * On Ibex Peak and Cougar Point, we need to disable clock
@@ -3587,8 +3603,17 @@
 	/* The below fixes the weird display corruption, a few pixels shifted
 	 * downward, on (only) LVDS of some HP laptops with IVY.
 	 */
-	for_each_pipe(pipe)
-		I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_CHICKEN2_TIMING_OVERRIDE);
+	for_each_pipe(pipe) {
+		val = I915_READ(TRANS_CHICKEN2(pipe));
+		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		if (dev_priv->fdi_rx_polarity_inverted)
+			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
+		I915_WRITE(TRANS_CHICKEN2(pipe), val);
+	}
 	/* WADP0ClockGatingDisable */
 	for_each_pipe(pipe) {
 		I915_WRITE(TRANS_CHICKEN1(pipe),
@@ -3890,7 +3915,8 @@
 	snpcr |= GEN6_MBC_SNPCR_MED;
 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-	cpt_init_clock_gating(dev);
+	if (!HAS_PCH_NOP(dev))
+		cpt_init_clock_gating(dev);
 
 	gen6_check_mch_setup(dev);
 }
@@ -4084,6 +4110,22 @@
 	dev_priv->display.init_clock_gating(dev);
 }
 
+/**
+ * We should only use the power well if we explicitly asked the hardware to
+ * enable it, so check if it's enabled and also check if we've requested it to
+ * be enabled.
+ */
+bool intel_using_power_well(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_HASWELL(dev))
+		return I915_READ(HSW_PWR_WELL_DRIVER) ==
+		       (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
+	else
+		return true;
+}
+
 void intel_set_power_well(struct drm_device *dev, bool enable)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4190,7 +4232,6 @@
 			}
 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
 		} else if (IS_IVYBRIDGE(dev)) {
-			/* FIXME: detect B0+ stepping and use auto training */
 			if (SNB_READ_WM0_LATENCY()) {
 				dev_priv->display.update_wm = ivybridge_update_wm;
 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 298dc85..a618a6a 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1231,12 +1231,8 @@
 	struct intel_sdvo_connector *intel_sdvo_connector =
 		to_intel_sdvo_connector(&connector->base);
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(&connector->base);
-	struct drm_i915_private *dev_priv = intel_sdvo->base.base.dev->dev_private;
 	u16 active_outputs;
 
-	if (!(I915_READ(intel_sdvo->sdvo_reg) & SDVO_ENABLE))
-		return false;
-
 	intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs);
 
 	if (active_outputs & intel_sdvo_connector->output_flag)
@@ -1251,11 +1247,13 @@
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	u16 active_outputs;
 	u32 tmp;
 
 	tmp = I915_READ(intel_sdvo->sdvo_reg);
+	intel_sdvo_get_active_outputs(intel_sdvo, &active_outputs);
 
-	if (!(tmp & SDVO_ENABLE))
+	if (!(tmp & SDVO_ENABLE) && (active_outputs == 0))
 		return false;
 
 	if (HAS_PCH_CPT(dev))
@@ -2276,7 +2274,6 @@
 	connector = &intel_connector->base;
 	if (intel_sdvo_get_hotplug_support(intel_sdvo) &
 		intel_sdvo_connector->output_flag) {
-		connector->polled = DRM_CONNECTOR_POLL_HPD;
 		intel_sdvo->hotplug_active |= intel_sdvo_connector->output_flag;
 		/* Some SDVO devices have one-shot hotplug interrupts.
 		 * Ensure that they get re-enabled when an interrupt happens.
@@ -2284,7 +2281,7 @@
 		intel_encoder->hot_plug = intel_sdvo_enable_hotplug;
 		intel_sdvo_enable_hotplug(intel_encoder);
 	} else {
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;
+		intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT;
 	}
 	encoder->encoder_type = DRM_MODE_ENCODER_TMDS;
 	connector->connector_type = DRM_MODE_CONNECTOR_DVID;
@@ -2353,7 +2350,7 @@
 
 	intel_connector = &intel_sdvo_connector->base;
 	connector = &intel_connector->base;
-	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+	intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 	encoder->encoder_type = DRM_MODE_ENCODER_DAC;
 	connector->connector_type = DRM_MODE_CONNECTOR_VGA;
 
@@ -2746,7 +2743,6 @@
 	struct intel_sdvo *intel_sdvo;
 	u32 hotplug_mask;
 	int i;
-
 	intel_sdvo = kzalloc(sizeof(struct intel_sdvo), GFP_KERNEL);
 	if (!intel_sdvo)
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6673726..b945bc5 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1613,7 +1613,7 @@
 	 *
 	 * More recent chipsets favour HDMI rather than integrated S-Video.
 	 */
-	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+	intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
 	drm_connector_init(dev, connector, &intel_tv_connector_funcs,
 			   DRM_MODE_CONNECTOR_SVIDEO);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 1a86242..489cb8c 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -555,6 +555,7 @@
 
 static int qxl_ttm_debugfs_init(struct qxl_device *qdev)
 {
+#if defined(CONFIG_DEBUG_FS)
 	static struct drm_info_list qxl_mem_types_list[QXL_DEBUGFS_MEM_TYPES];
 	static char qxl_mem_types_names[QXL_DEBUGFS_MEM_TYPES][32];
 	unsigned i;
@@ -574,4 +575,7 @@
 
 	}
 	return qxl_debugfs_add_files(qdev, qxl_mem_types_list, i);
+#else
+	return 0;
+#endif
 }
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index bf17252..86c5e36 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -76,7 +76,7 @@
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
 	evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
 	atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
-	si_blit_shaders.o radeon_prime.o
+	si_blit_shaders.o radeon_prime.o radeon_uvd.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 21a892c..6d6fdb3 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -557,6 +557,9 @@
 		/* use frac fb div on APUs */
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
+		/* use frac fb div on RS780/RS880 */
+		if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 	} else {
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 305a657..124c193 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -84,6 +84,217 @@
 	}
 }
 
+static int sumo_set_uvd_clock(struct radeon_device *rdev, u32 clock,
+			      u32 cntl_reg, u32 status_reg)
+{
+	int r, i;
+	struct atom_clock_dividers dividers;
+
+        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					   clock, false, &dividers);
+	if (r)
+		return r;
+
+	WREG32_P(cntl_reg, dividers.post_div, ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK));
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32(status_reg) & DCLK_STATUS)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
+{
+	int r = 0;
+	u32 cg_scratch = RREG32(CG_SCRATCH1);
+
+	r = sumo_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
+	if (r)
+		goto done;
+	cg_scratch &= 0xffff0000;
+	cg_scratch |= vclk / 100; /* Mhz */
+
+	r = sumo_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
+	if (r)
+		goto done;
+	cg_scratch &= 0x0000ffff;
+	cg_scratch |= (dclk / 100) << 16; /* Mhz */
+
+done:
+	WREG32(CG_SCRATCH1, cg_scratch);
+
+	return r;
+}
+
+static int evergreen_uvd_calc_post_div(unsigned target_freq,
+				       unsigned vco_freq,
+				       unsigned *div)
+{
+	/* target larger than vco frequency ? */
+	if (vco_freq < target_freq)
+		return -1; /* forget it */
+
+	/* Fclk = Fvco / PDIV */
+	*div = vco_freq / target_freq;
+
+	/* we alway need a frequency less than or equal the target */
+	if ((vco_freq / *div) > target_freq)
+		*div += 1;
+
+	/* dividers above 5 must be even */
+	if (*div > 5 && *div % 2)
+		*div += 1;
+
+	/* out of range ? */
+	if (*div >= 128)
+		return -1; /* forget it */
+
+	return vco_freq / *div;
+}
+
+static int evergreen_uvd_send_upll_ctlreq(struct radeon_device *rdev)
+{
+	unsigned i;
+
+	/* assert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	/* deassert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+	return 0;
+}
+
+int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
+{
+	/* start off with something large */
+	int optimal_diff_score = 0x7FFFFFF;
+	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
+	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
+	unsigned vco_freq;
+	int r;
+
+	/* loop through vco from low to high */
+	for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
+		unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
+		int calc_clk, diff_score, diff_vclk, diff_dclk;
+		unsigned vclk_div, dclk_div;
+
+		/* fb div out of range ? */
+		if (fb_div > 0x03FFFFFF)
+			break; /* it can oly get worse */
+
+		/* calc vclk with current vco freq. */
+		calc_clk = evergreen_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_vclk = vclk - calc_clk;
+
+		/* calc dclk with current vco freq. */
+		calc_clk = evergreen_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_dclk = dclk - calc_clk;
+
+		/* determine if this vco setting is better than current optimal settings */
+		diff_score = abs(diff_vclk) + abs(diff_dclk);
+		if (diff_score < optimal_diff_score) {
+			optimal_fb_div = fb_div;
+			optimal_vclk_div = vclk_div;
+			optimal_dclk_div = dclk_div;
+			optimal_vco_freq = vco_freq;
+			optimal_diff_score = diff_score;
+			if (optimal_diff_score == 0)
+				break; /* it can't get better than this */
+		}
+	}
+
+	/* set VCO_MODE to 1 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
+
+	/* toggle UPLL_SLEEP to 1 then back to 0 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
+
+	/* deassert UPLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(1);
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* put PLL in bypass mode */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
+
+	r = evergreen_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* assert UPLL_RESET again */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* disable spread spectrum. */
+	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+	/* set feedback divider */
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
+
+	/* set ref divider to 0 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
+
+	if (optimal_vco_freq < 187500)
+		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
+	else
+		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
+
+	/* set PDIV_A and PDIV_B */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
+		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* switch from bypass mode to normal mode */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+	r = evergreen_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
+	return 0;
+}
+
 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 {
 	u16 ctl, v;
@@ -608,6 +819,16 @@
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+			/* don't try to enable hpd on eDP or LVDS avoid breaking the
+			 * aux dp channel on imac and help (but not completely fix)
+			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+			 * also avoid interrupt storms during dpms.
+			 */
+			continue;
+		}
 		switch (radeon_connector->hpd.hpd) {
 		case RADEON_HPD_1:
 			WREG32(DC_HPD1_CONTROL, tmp);
@@ -2050,6 +2271,14 @@
 	}
 	/* enabled rb are just the one not disabled :) */
 	disabled_rb_mask = tmp;
+	tmp = 0;
+	for (i = 0; i < rdev->config.evergreen.max_backends; i++)
+		tmp |= (1 << i);
+	/* if all the backends are disabled, fix it up here */
+	if ((disabled_rb_mask & tmp) == tmp) {
+		for (i = 0; i < rdev->config.evergreen.max_backends; i++)
+			disabled_rb_mask &= ~(1 << i);
+	}
 
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
@@ -2058,6 +2287,9 @@
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
 
 	if ((rdev->config.evergreen.max_backends == 1) &&
 	    (rdev->flags & RADEON_IS_IGP)) {
@@ -3360,6 +3592,9 @@
 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
 				break;
 			}
+		case 124: /* UVD */
+			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
 			break;
 		case 146:
 		case 147:
@@ -3571,7 +3806,7 @@
 
 static int evergreen_startup(struct radeon_device *rdev)
 {
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_ring *ring;
 	int r;
 
 	/* enable pcie gen2 link */
@@ -3638,6 +3873,17 @@
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -3647,6 +3893,7 @@
 	}
 	evergreen_irq_set(rdev);
 
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
 			     0, 0xfffff, RADEON_CP_PACKET2);
@@ -3670,6 +3917,19 @@
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+
+		if (r)
+			DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3716,8 +3976,10 @@
 int evergreen_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
@@ -3797,6 +4059,13 @@
 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+		r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+			       4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3843,6 +4112,7 @@
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	evergreen_pcie_gart_fini(rdev);
+	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);
@@ -3878,7 +4148,7 @@
 	if (!(mask & DRM_PCIE_SPEED_50))
 		return;
 
-	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	if (speed_cntl & LC_CURRENT_DATA_RATE) {
 		DRM_INFO("PCIE gen 2 link speeds already enabled\n");
 		return;
@@ -3889,33 +4159,33 @@
 	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) ||
 	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
 
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl |= LC_GEN2_EN_STRAP;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
 	} else {
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		/* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
 		if (1)
 			link_width_cntl |= LC_UPCONFIGURE_DIS;
 		else
 			link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	}
 }
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 982d25a..eabf92a 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -53,6 +53,43 @@
 #define RCU_IND_INDEX           			0x100
 #define RCU_IND_DATA            			0x104
 
+/* discrete uvd clocks */
+#define CG_UPLL_FUNC_CNTL				0x718
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
+#	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_VCO_MODE_MASK			0x00000200
+#	define UPLL_CTLACK_MASK				0x40000000
+#	define UPLL_CTLACK2_MASK			0x80000000
+#define CG_UPLL_FUNC_CNTL_2				0x71c
+#	define UPLL_PDIV_A(x)				((x) << 0)
+#	define UPLL_PDIV_A_MASK				0x0000007F
+#	define UPLL_PDIV_B(x)				((x) << 8)
+#	define UPLL_PDIV_B_MASK				0x00007F00
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000
+#define CG_UPLL_FUNC_CNTL_3				0x720
+#	define UPLL_FB_DIV(x)				((x) << 0)
+#	define UPLL_FB_DIV_MASK				0x01FFFFFF
+#define CG_UPLL_FUNC_CNTL_4				0x854
+#	define UPLL_SPARE_ISPARE9			0x00020000
+#define CG_UPLL_SPREAD_SPECTRUM				0x79c
+#	define SSEN_MASK				0x00000001
+
+/* fusion uvd clocks */
+#define CG_DCLK_CNTL                                    0x610
+#       define DCLK_DIVIDER_MASK                        0x7f
+#       define DCLK_DIR_CNTL_EN                         (1 << 8)
+#define CG_DCLK_STATUS                                  0x614
+#       define DCLK_STATUS                              (1 << 0)
+#define CG_VCLK_CNTL                                    0x618
+#define CG_VCLK_STATUS                                  0x61c
+#define	CG_SCRATCH1					0x820
+
 #define GRBM_GFX_INDEX          			0x802C
 #define		INSTANCE_INDEX(x)			((x) << 0)
 #define		SE_INDEX(x)     			((x) << 16)
@@ -992,6 +1029,16 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+
+/*
+ * UVD
+ */
+#define UVD_UDEC_ADDR_CONFIG				0xef4c
+#define UVD_UDEC_DB_ADDR_CONFIG				0xef50
+#define UVD_UDEC_DBW_ADDR_CONFIG			0xef54
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+
 /*
  * PM4
  */
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 27769e7..fd03f31 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -615,15 +615,28 @@
 	}
 	/* enabled rb are just the one not disabled :) */
 	disabled_rb_mask = tmp;
+	tmp = 0;
+	for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
+		tmp |= (1 << i);
+	/* if all the backends are disabled, fix it up here */
+	if ((disabled_rb_mask & tmp) == tmp) {
+		for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
+			disabled_rb_mask &= ~(1 << i);
+	}
 
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
+	if (ASIC_IS_DCE6(rdev))
+		WREG32(DMIF_ADDR_CALC, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
+	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
 
 	if ((rdev->config.cayman.max_backends_per_se == 1) &&
 	    (rdev->flags & RADEON_IS_IGP)) {
@@ -931,6 +944,23 @@
 	radeon_ring_write(ring, 10); /* poll interval */
 }
 
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
+}
+
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
@@ -1682,6 +1712,16 @@
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
 	if (r) {
 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -1748,6 +1788,18 @@
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1794,6 +1846,8 @@
 	radeon_vm_manager_fini(rdev);
 	cayman_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
+	radeon_uvd_suspend(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
@@ -1868,6 +1922,13 @@
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1919,6 +1980,7 @@
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	radeon_uvd_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
@@ -2017,28 +2079,57 @@
 			}
 		}
 	} else {
-		while (count) {
-			ndw = count * 2;
-			if (ndw > 0xFFFFE)
-				ndw = 0xFFFFE;
+		if ((flags & RADEON_VM_PAGE_SYSTEM) ||
+		    (count == 1)) {
+			while (count) {
+				ndw = count * 2;
+				if (ndw > 0xFFFFE)
+					ndw = 0xFFFFE;
 
-			/* for non-physically contiguous pages (system) */
-			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
-			ib->ptr[ib->length_dw++] = pe;
-			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-			for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-				if (flags & RADEON_VM_PAGE_SYSTEM) {
-					value = radeon_vm_map_gart(rdev, addr);
-					value &= 0xFFFFFFFFFFFFF000ULL;
-				} else if (flags & RADEON_VM_PAGE_VALID) {
-					value = addr;
-				} else {
-					value = 0;
+				/* for non-physically contiguous pages (system) */
+				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+				ib->ptr[ib->length_dw++] = pe;
+				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+					if (flags & RADEON_VM_PAGE_SYSTEM) {
+						value = radeon_vm_map_gart(rdev, addr);
+						value &= 0xFFFFFFFFFFFFF000ULL;
+					} else if (flags & RADEON_VM_PAGE_VALID) {
+						value = addr;
+					} else {
+						value = 0;
+					}
+					addr += incr;
+					value |= r600_flags;
+					ib->ptr[ib->length_dw++] = value;
+					ib->ptr[ib->length_dw++] = upper_32_bits(value);
 				}
-				addr += incr;
-				value |= r600_flags;
-				ib->ptr[ib->length_dw++] = value;
+			}
+			while (ib->length_dw & 0x7)
+				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+		} else {
+			while (count) {
+				ndw = count * 2;
+				if (ndw > 0xFFFFE)
+					ndw = 0xFFFFE;
+
+				if (flags & RADEON_VM_PAGE_VALID)
+					value = addr;
+				else
+					value = 0;
+				/* for physically contiguous pages (vram) */
+				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+				ib->ptr[ib->length_dw++] = pe; /* dst addr */
+				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+				ib->ptr[ib->length_dw++] = 0;
+				ib->ptr[ib->length_dw++] = value; /* value */
 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
+				ib->ptr[ib->length_dw++] = incr; /* increment size */
+				ib->ptr[ib->length_dw++] = 0;
+				pe += ndw * 4;
+				addr += (ndw / 2) * incr;
+				count -= ndw / 2;
 			}
 		}
 		while (ib->length_dw & 0x7)
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 079dee2..e226faf 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -45,6 +45,10 @@
 #define ARUBA_GB_ADDR_CONFIG_GOLDEN        0x12010001
 
 #define DMIF_ADDR_CONFIG  				0xBD4
+
+/* DCE6 only */
+#define DMIF_ADDR_CALC  				0xC00
+
 #define	SRBM_GFX_CNTL				        0x0E44
 #define		RINGID(x)					(((x) & 0x3) << 0)
 #define		VMID(x)						(((x) & 0x7) << 0)
@@ -486,6 +490,18 @@
 #       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW				0xEF00
+#define UVD_SEMA_ADDR_HIGH				0xEF04
+#define UVD_SEMA_CMD					0xEF08
+#define UVD_UDEC_ADDR_CONFIG				0xEF4C
+#define UVD_UDEC_DB_ADDR_CONFIG				0xEF50
+#define UVD_UDEC_DBW_ADDR_CONFIG			0xEF54
+#define UVD_RBC_RB_RPTR					0xF690
+#define UVD_RBC_RB_WPTR					0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
@@ -668,6 +684,11 @@
 					 (((vmid) & 0xF) << 20) |	\
 					 (((n) & 0xFFFFF) << 0))
 
+#define DMA_PTE_PDE_PACKET(n)		((2 << 28) |			\
+					 (1 << 26) |			\
+					 (1 << 21) |			\
+					 (((n) & 0xFFFFF) << 0))
+
 /* async DMA Packet types */
 #define	DMA_PACKET_WRITE				  0x2
 #define	DMA_PACKET_COPY					  0x3
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0740db3..5fe9e74 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1145,7 +1145,7 @@
 	}
 	if (rdev->flags & RADEON_IS_AGP) {
 		size_bf = mc->gtt_start;
-		size_af = 0xFFFFFFFF - mc->gtt_end;
+		size_af = mc->mc_mask - mc->gtt_end;
 		if (size_bf > size_af) {
 			if (mc->mc_vram_size > size_bf) {
 				dev_warn(rdev->dev, "limiting VRAM\n");
@@ -2552,6 +2552,185 @@
 }
 
 /*
+ * UVD
+ */
+int r600_uvd_rbc_start(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	uint64_t rptr_addr;
+	uint32_t rb_bufsz, tmp;
+	int r;
+
+	rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
+
+	if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
+		DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
+		return -EINVAL;
+	}
+
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+
+	/* Set the write pointer delay */
+	WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+
+	/* set the wb address */
+	WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
+
+	/* programm the 4GB memory segment for rptr and ring buffer */
+	WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
+				   (0x7 << 16) | (0x1 << 31));
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(UVD_RBC_RB_RPTR, 0x0);
+
+	ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+
+	/* set the ring address */
+	WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+
+	/* Set ring buffer size */
+	rb_bufsz = drm_order(ring->ring_size);
+	rb_bufsz = (0x1 << 8) | rb_bufsz;
+	WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
+
+	ring->ready = true;
+	r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+	if (r) {
+		ring->ready = false;
+		return r;
+	}
+
+	r = radeon_ring_lock(rdev, ring, 10);
+	if (r) {
+		DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+		return r;
+	}
+
+	tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	/* Clear timeout status bits */
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+	radeon_ring_write(ring, 0x8);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+	radeon_ring_write(ring, 1);
+
+	radeon_ring_unlock_commit(rdev, ring);
+
+	return 0;
+}
+
+void r600_uvd_rbc_stop(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+	ring->ready = false;
+}
+
+int r600_uvd_init(struct radeon_device *rdev)
+{
+	int i, j, r;
+
+	/* disable clock gating */
+	WREG32(UVD_CGC_GATE, 0);
+
+	/* disable interupt */
+	WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+
+	/* put LMI, VCPU, RBC etc... into reset */
+	WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+	       LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+	       CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+	mdelay(5);
+
+	/* take UVD block out of reset */
+	WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+	mdelay(5);
+
+	/* initialize UVD memory controller */
+	WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+			     (1 << 21) | (1 << 9) | (1 << 20));
+
+	/* disable byte swapping */
+	WREG32(UVD_LMI_SWAP_CNTL, 0);
+	WREG32(UVD_MP_SWAP_CNTL, 0);
+
+	WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXA1, 0x0);
+	WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXB1, 0x0);
+	WREG32(UVD_MPC_SET_ALU, 0);
+	WREG32(UVD_MPC_SET_MUX, 0x88);
+
+	/* Stall UMC */
+	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+
+	/* take all subblocks out of reset, except VCPU */
+	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+	mdelay(5);
+
+	/* enable VCPU clock */
+	WREG32(UVD_VCPU_CNTL,  1 << 9);
+
+	/* enable UMC */
+	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+
+	/* boot up the VCPU */
+	WREG32(UVD_SOFT_RESET, 0);
+	mdelay(10);
+
+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
+	for (i = 0; i < 10; ++i) {
+		uint32_t status;
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(UVD_STATUS);
+			if (status & 2)
+				break;
+			mdelay(10);
+		}
+		r = 0;
+		if (status & 2)
+			break;
+
+		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+		WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		r = -1;
+	}
+	if (r) {
+		DRM_ERROR("UVD not responding, giving up!!!\n");
+		return r;
+	}
+	/* enable interupt */
+	WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+	r = r600_uvd_rbc_start(rdev);
+	if (r)
+		return r;
+
+	DRM_INFO("UVD initialized successfully.\n");
+	return 0;
+}
+
+/*
  * GPU scratch registers helpers function.
  */
 void r600_scratch_init(struct radeon_device *rdev)
@@ -2660,6 +2839,40 @@
 	return r;
 }
 
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, ring, 3);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(UVD_CONTEXT_ID);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
 /*
  * CP fences/semaphores
  */
@@ -2711,6 +2924,30 @@
 	}
 }
 
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 2);
+	return;
+}
+
 void r600_semaphore_ring_emit(struct radeon_device *rdev,
 			      struct radeon_ring *ring,
 			      struct radeon_semaphore *semaphore,
@@ -2780,6 +3017,23 @@
 	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
 }
 
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, emit_wait ? 1 : 0);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
@@ -3183,6 +3437,16 @@
 	radeon_ring_write(ring, ib->length_dw);
 }
 
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+	radeon_ring_write(ring, ib->gpu_addr);
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+	radeon_ring_write(ring, ib->length_dw);
+}
+
 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	struct radeon_ib ib;
@@ -3300,6 +3564,33 @@
 	return r;
 }
 
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_fence *fence;
+	int r;
+
+	r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+	if (r) {
+		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+	if (r) {
+		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_wait(fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+	radeon_fence_unref(&fence);
+	return r;
+}
+
 /**
  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
  *
@@ -4232,7 +4523,7 @@
 
 void r600_set_pcie_lanes(struct radeon_device *rdev, int lanes)
 {
-	u32 link_width_cntl, mask, target_reg;
+	u32 link_width_cntl, mask;
 
 	if (rdev->flags & RADEON_IS_IGP)
 		return;
@@ -4244,7 +4535,7 @@
 	if (ASIC_IS_X2(rdev))
 		return;
 
-	/* FIXME wait for idle */
+	radeon_gui_idle(rdev);
 
 	switch (lanes) {
 	case 0:
@@ -4263,53 +4554,24 @@
 		mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
 		break;
 	case 12:
+		/* not actually supported */
 		mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
 		break;
 	case 16:
-	default:
 		mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
 		break;
+	default:
+		DRM_ERROR("invalid pcie lane request: %d\n", lanes);
+		return;
 	}
 
-	link_width_cntl = RREG32_PCIE_P(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl = RREG32_PCIE_PORT(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl &= ~RADEON_PCIE_LC_LINK_WIDTH_MASK;
+	link_width_cntl |= mask << RADEON_PCIE_LC_LINK_WIDTH_SHIFT;
+	link_width_cntl |= (RADEON_PCIE_LC_RECONFIG_NOW |
+			    R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE);
 
-	if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
-	    (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
-		return;
-
-	if (link_width_cntl & R600_PCIE_LC_UPCONFIGURE_DIS)
-		return;
-
-	link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
-			     RADEON_PCIE_LC_RECONFIG_NOW |
-			     R600_PCIE_LC_RENEGOTIATE_EN |
-			     R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE);
-	link_width_cntl |= mask;
-
-	WREG32_PCIE_P(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
-
-        /* some northbridges can renegotiate the link rather than requiring                                  
-         * a complete re-config.                                                                             
-         * e.g., AMD 780/790 northbridges (pci ids: 0x5956, 0x5957, 0x5958, etc.)                            
-         */
-        if (link_width_cntl & R600_PCIE_LC_RENEGOTIATION_SUPPORT)
-		link_width_cntl |= R600_PCIE_LC_RENEGOTIATE_EN | R600_PCIE_LC_UPCONFIGURE_SUPPORT;
-        else
-		link_width_cntl |= R600_PCIE_LC_RECONFIG_ARC_MISSING_ESCAPE;
-
-	WREG32_PCIE_P(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
-						       RADEON_PCIE_LC_RECONFIG_NOW));
-
-        if (rdev->family >= CHIP_RV770)
-		target_reg = R700_TARGET_AND_CURRENT_PROFILE_INDEX;
-        else
-		target_reg = R600_TARGET_AND_CURRENT_PROFILE_INDEX;
-
-        /* wait for lane set to complete */
-        link_width_cntl = RREG32(target_reg);
-        while (link_width_cntl == 0xffffffff)
-		link_width_cntl = RREG32(target_reg);
-
+	WREG32_PCIE_PORT(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 }
 
 int r600_get_pcie_lanes(struct radeon_device *rdev)
@@ -4326,13 +4588,11 @@
 	if (ASIC_IS_X2(rdev))
 		return 0;
 
-	/* FIXME wait for idle */
+	radeon_gui_idle(rdev);
 
-	link_width_cntl = RREG32_PCIE_P(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl = RREG32_PCIE_PORT(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
 
 	switch ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) >> RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT) {
-	case RADEON_PCIE_LC_LINK_WIDTH_X0:
-		return 0;
 	case RADEON_PCIE_LC_LINK_WIDTH_X1:
 		return 1;
 	case RADEON_PCIE_LC_LINK_WIDTH_X2:
@@ -4341,6 +4601,10 @@
 		return 4;
 	case RADEON_PCIE_LC_LINK_WIDTH_X8:
 		return 8;
+	case RADEON_PCIE_LC_LINK_WIDTH_X12:
+		/* not actually supported */
+		return 12;
+	case RADEON_PCIE_LC_LINK_WIDTH_X0:
 	case RADEON_PCIE_LC_LINK_WIDTH_X16:
 	default:
 		return 16;
@@ -4378,7 +4642,7 @@
 	if (!(mask & DRM_PCIE_SPEED_50))
 		return;
 
-	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	if (speed_cntl & LC_CURRENT_DATA_RATE) {
 		DRM_INFO("PCIE gen 2 link speeds already enabled\n");
 		return;
@@ -4391,23 +4655,23 @@
 	    (rdev->family == CHIP_RV620) ||
 	    (rdev->family == CHIP_RV635)) {
 		/* advertise upconfig capability */
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
 			lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
 			link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
 					     LC_RECONFIG_ARC_MISSING_ESCAPE);
 			link_width_cntl |= lanes | LC_RECONFIG_NOW | LC_RENEGOTIATE_EN;
-			WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+			WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 		} else {
 			link_width_cntl |= LC_UPCONFIGURE_DIS;
-			WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+			WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 		}
 	}
 
-	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
 	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
 
@@ -4428,7 +4692,7 @@
 		speed_cntl &= ~LC_VOLTAGE_TIMER_SEL_MASK;
 		speed_cntl &= ~LC_FORCE_DIS_HW_SPEED_CHANGE;
 		speed_cntl |= LC_FORCE_EN_HW_SPEED_CHANGE;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
 		tmp = RREG32(0x541c);
 		WREG32(0x541c, tmp | 0x8);
@@ -4442,27 +4706,27 @@
 		if ((rdev->family == CHIP_RV670) ||
 		    (rdev->family == CHIP_RV620) ||
 		    (rdev->family == CHIP_RV635)) {
-			training_cntl = RREG32_PCIE_P(PCIE_LC_TRAINING_CNTL);
+			training_cntl = RREG32_PCIE_PORT(PCIE_LC_TRAINING_CNTL);
 			training_cntl &= ~LC_POINT_7_PLUS_EN;
-			WREG32_PCIE_P(PCIE_LC_TRAINING_CNTL, training_cntl);
+			WREG32_PCIE_PORT(PCIE_LC_TRAINING_CNTL, training_cntl);
 		} else {
-			speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+			speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 			speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
-			WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+			WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 		}
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl |= LC_GEN2_EN_STRAP;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
 	} else {
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		/* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
 		if (1)
 			link_width_cntl |= LC_UPCONFIGURE_DIS;
 		else
 			link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index a42ba11..441bdb8 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -691,6 +691,7 @@
 #define SRBM_SOFT_RESET                                   0xe60
 #       define SOFT_RESET_DMA                             (1 << 12)
 #       define SOFT_RESET_RLC                             (1 << 13)
+#       define SOFT_RESET_UVD                             (1 << 18)
 #       define RV770_SOFT_RESET_DMA                       (1 << 20)
 
 #define CP_INT_CNTL                                       0xc124
@@ -1143,6 +1144,66 @@
 #       define AFMT_AZ_AUDIO_ENABLE_CHG_ACK  (1 << 30)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW				0xef00
+#define UVD_SEMA_ADDR_HIGH				0xef04
+#define UVD_SEMA_CMD					0xef08
+
+#define UVD_GPCOM_VCPU_CMD				0xef0c
+#define UVD_GPCOM_VCPU_DATA0				0xef10
+#define UVD_GPCOM_VCPU_DATA1				0xef14
+#define UVD_ENGINE_CNTL					0xef18
+
+#define UVD_SEMA_CNTL					0xf400
+#define UVD_RB_ARB_CTRL					0xf480
+
+#define UVD_LMI_EXT40_ADDR				0xf498
+#define UVD_CGC_GATE					0xf4a8
+#define UVD_LMI_CTRL2					0xf4f4
+#define UVD_MASTINT_EN					0xf500
+#define UVD_LMI_ADDR_EXT				0xf594
+#define UVD_LMI_CTRL					0xf598
+#define UVD_LMI_SWAP_CNTL				0xf5b4
+#define UVD_MP_SWAP_CNTL				0xf5bC
+#define UVD_MPC_CNTL					0xf5dC
+#define UVD_MPC_SET_MUXA0				0xf5e4
+#define UVD_MPC_SET_MUXA1				0xf5e8
+#define UVD_MPC_SET_MUXB0				0xf5eC
+#define UVD_MPC_SET_MUXB1				0xf5f0
+#define UVD_MPC_SET_MUX					0xf5f4
+#define UVD_MPC_SET_ALU					0xf5f8
+
+#define UVD_VCPU_CNTL					0xf660
+#define UVD_SOFT_RESET					0xf680
+#define		RBC_SOFT_RESET					(1<<0)
+#define		LBSI_SOFT_RESET					(1<<1)
+#define		LMI_SOFT_RESET					(1<<2)
+#define		VCPU_SOFT_RESET					(1<<3)
+#define		CSM_SOFT_RESET					(1<<5)
+#define		CXW_SOFT_RESET					(1<<6)
+#define		TAP_SOFT_RESET					(1<<7)
+#define		LMI_UMC_SOFT_RESET				(1<<13)
+#define UVD_RBC_IB_BASE					0xf684
+#define UVD_RBC_IB_SIZE					0xf688
+#define UVD_RBC_RB_BASE					0xf68c
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+#define UVD_RBC_RB_WPTR_CNTL				0xf698
+
+#define UVD_STATUS					0xf6bc
+
+#define UVD_SEMA_TIMEOUT_STATUS				0xf6c0
+#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL		0xf6c4
+#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL		0xf6c8
+#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL		0xf6cc
+
+#define UVD_RBC_RB_CNTL					0xf6a4
+#define UVD_RBC_RB_RPTR_ADDR				0xf6a8
+
+#define UVD_CONTEXT_ID					0xf6f4
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8263af3f..18904fb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -95,6 +95,7 @@
 extern int radeon_pcie_gen2;
 extern int radeon_msi;
 extern int radeon_lockup_timeout;
+extern int radeon_fastfb;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
@@ -109,24 +110,27 @@
 #define RADEON_BIOS_NUM_SCRATCH			8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS			5
+#define RADEON_NUM_RINGS			6
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ		0LL
 
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
-#define RADEON_RING_TYPE_GFX_INDEX		0
+#define RADEON_RING_TYPE_GFX_INDEX	0
 
 /* cayman has 2 compute CP rings */
-#define CAYMAN_RING_TYPE_CP1_INDEX		1
-#define CAYMAN_RING_TYPE_CP2_INDEX		2
+#define CAYMAN_RING_TYPE_CP1_INDEX	1
+#define CAYMAN_RING_TYPE_CP2_INDEX	2
 
 /* R600+ has an async dma ring */
 #define R600_RING_TYPE_DMA_INDEX		3
 /* cayman add a second async dma ring */
 #define CAYMAN_RING_TYPE_DMA1_INDEX		4
 
+/* R600+ */
+#define R600_RING_TYPE_UVD_INDEX	5
+
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET			(1 << 20)
 #define RADEON_VA_RESERVED_SIZE			(8 << 20)
@@ -202,6 +206,11 @@
 void radeon_pm_resume(struct radeon_device *rdev);
 void radeon_combios_get_power_modes(struct radeon_device *rdev);
 void radeon_atombios_get_power_modes(struct radeon_device *rdev);
+int radeon_atom_get_clock_dividers(struct radeon_device *rdev,
+				   u8 clock_type,
+				   u32 clock,
+				   bool strobe_mode,
+				   struct atom_clock_dividers *dividers);
 void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
 void rs690_pm_info(struct radeon_device *rdev);
 extern int rv6xx_get_temp(struct radeon_device *rdev);
@@ -357,8 +366,9 @@
 	struct ttm_validate_buffer tv;
 	struct radeon_bo	*bo;
 	uint64_t		gpu_offset;
-	unsigned		rdomain;
-	unsigned		wdomain;
+	bool			written;
+	unsigned		domain;
+	unsigned		alt_domain;
 	u32			tiling_flags;
 };
 
@@ -517,6 +527,7 @@
 	bool			vram_is_ddr;
 	bool			igp_sideport_enabled;
 	u64                     gtt_base_align;
+	u64                     mc_mask;
 };
 
 bool radeon_combios_sideport_present(struct radeon_device *rdev);
@@ -918,6 +929,7 @@
 #define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
 #define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
+#define R600_WB_UVD_RPTR_OFFSET  2560
 #define R600_WB_EVENT_OFFSET     3072
 
 /**
@@ -1118,6 +1130,33 @@
 int radeon_pm_get_type_index(struct radeon_device *rdev,
 			     enum radeon_pm_state_type ps_type,
 			     int instance);
+/*
+ * UVD
+ */
+#define RADEON_MAX_UVD_HANDLES	10
+#define RADEON_UVD_STACK_SIZE	(1024*1024)
+#define RADEON_UVD_HEAP_SIZE	(1024*1024)
+
+struct radeon_uvd {
+	struct radeon_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	atomic_t		handles[RADEON_MAX_UVD_HANDLES];
+	struct drm_file		*filp[RADEON_MAX_UVD_HANDLES];
+};
+
+int radeon_uvd_init(struct radeon_device *rdev);
+void radeon_uvd_fini(struct radeon_device *rdev);
+int radeon_uvd_suspend(struct radeon_device *rdev);
+int radeon_uvd_resume(struct radeon_device *rdev);
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence);
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence);
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
+void radeon_uvd_free_handles(struct radeon_device *rdev,
+			     struct drm_file *filp);
+int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
 
 struct r600_audio {
 	int			channels;
@@ -1281,6 +1320,7 @@
 		int (*get_pcie_lanes)(struct radeon_device *rdev);
 		void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 		void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+		int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk);
 	} pm;
 	/* pageflipping */
 	struct {
@@ -1443,6 +1483,7 @@
 	unsigned multi_gpu_tile_size;
 
 	unsigned tile_config;
+	uint32_t tile_mode_array[32];
 };
 
 union radeon_asic_config {
@@ -1608,6 +1649,7 @@
 	struct radeon_asic		*asic;
 	struct radeon_gem		gem;
 	struct radeon_pm		pm;
+	struct radeon_uvd		uvd;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
@@ -1615,12 +1657,14 @@
 	bool				suspend;
 	bool				need_dma32;
 	bool				accel_working;
+	bool				fastfb_working; /* IGP feature*/
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
 	const struct firmware *rlc_fw;	/* r6/700 RLC firmware */
 	const struct firmware *mc_fw;	/* NI MC firmware */
 	const struct firmware *ce_fw;	/* SI CE firmware */
+	const struct firmware *uvd_fw;	/* UVD firmware */
 	struct r600_blit r600_blit;
 	struct r600_vram_scratch vram_scratch;
 	int msi_enabled; /* msi enabled */
@@ -1688,8 +1732,8 @@
 #define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
 #define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
 #define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
-#define RREG32_PCIE_P(reg) rdev->pciep_rreg(rdev, (reg))
-#define WREG32_PCIE_P(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
+#define RREG32_PCIE_PORT(reg) rdev->pciep_rreg(rdev, (reg))
+#define WREG32_PCIE_PORT(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
 #define WREG32_P(reg, val, mask)				\
 	do {							\
 		uint32_t tmp_ = RREG32(reg);			\
@@ -1845,6 +1889,7 @@
 #define radeon_get_pcie_lanes(rdev) (rdev)->asic->pm.get_pcie_lanes((rdev))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e))
+#define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d))
 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s)))
 #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
 #define radeon_bandwidth_update(rdev) (rdev)->asic->display.bandwidth_update((rdev))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index aba0a89..48d0200 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1130,6 +1130,15 @@
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &r600_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1174,6 +1183,7 @@
 		.get_pcie_lanes = &r600_get_pcie_lanes,
 		.set_pcie_lanes = &r600_set_pcie_lanes,
 		.set_clock_gating = &radeon_atom_set_clock_gating,
+		.set_uvd_clocks = &rv770_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &rs600_pre_page_flip,
@@ -1216,6 +1226,15 @@
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1260,6 +1279,7 @@
 		.get_pcie_lanes = &r600_get_pcie_lanes,
 		.set_pcie_lanes = &r600_set_pcie_lanes,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &evergreen_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -1302,6 +1322,15 @@
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1346,6 +1375,7 @@
 		.get_pcie_lanes = NULL,
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &sumo_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -1388,6 +1418,15 @@
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1429,9 +1468,10 @@
 		.set_engine_clock = &radeon_atom_set_engine_clock,
 		.get_memory_clock = &radeon_atom_get_memory_clock,
 		.set_memory_clock = &radeon_atom_set_memory_clock,
-		.get_pcie_lanes = NULL,
-		.set_pcie_lanes = NULL,
+		.get_pcie_lanes = &r600_get_pcie_lanes,
+		.set_pcie_lanes = &r600_set_pcie_lanes,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &evergreen_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -1517,6 +1557,15 @@
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &cayman_dma_is_lockup,
 			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1558,9 +1607,10 @@
 		.set_engine_clock = &radeon_atom_set_engine_clock,
 		.get_memory_clock = &radeon_atom_get_memory_clock,
 		.set_memory_clock = &radeon_atom_set_memory_clock,
-		.get_pcie_lanes = NULL,
-		.set_pcie_lanes = NULL,
+		.get_pcie_lanes = &r600_get_pcie_lanes,
+		.set_pcie_lanes = &r600_set_pcie_lanes,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &evergreen_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -1646,6 +1696,15 @@
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &cayman_dma_is_lockup,
 			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1690,6 +1749,7 @@
 		.get_pcie_lanes = NULL,
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &sumo_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -1775,6 +1835,15 @@
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &si_dma_is_lockup,
 			.vm_flush = &si_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1816,9 +1885,10 @@
 		.set_engine_clock = &radeon_atom_set_engine_clock,
 		.get_memory_clock = &radeon_atom_get_memory_clock,
 		.set_memory_clock = &radeon_atom_set_memory_clock,
-		.get_pcie_lanes = NULL,
-		.set_pcie_lanes = NULL,
+		.get_pcie_lanes = &r600_get_pcie_lanes,
+		.set_pcie_lanes = &r600_set_pcie_lanes,
 		.set_clock_gating = NULL,
+		.set_uvd_clocks = &si_set_uvd_clocks,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3535f73..2add526 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -330,6 +330,7 @@
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages, struct radeon_fence **fence);
@@ -392,6 +393,19 @@
 u32 r600_get_xclk(struct radeon_device *rdev);
 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
 
+/* uvd */
+int r600_uvd_init(struct radeon_device *rdev);
+int r600_uvd_rbc_start(struct radeon_device *rdev);
+void r600_uvd_rbc_stop(struct radeon_device *rdev);
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait);
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+
 /*
  * rv770,rv730,rv710,rv740
  */
@@ -409,6 +423,8 @@
 		  unsigned num_gpu_pages,
 		   struct radeon_fence **fence);
 u32 rv770_get_xclk(struct radeon_device *rdev);
+int rv770_uvd_resume(struct radeon_device *rdev);
+int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 
 /*
  * evergreen
@@ -444,6 +460,8 @@
 extern void evergreen_pm_finish(struct radeon_device *rdev);
 extern void sumo_pm_init_profile(struct radeon_device *rdev);
 extern void btc_pm_init_profile(struct radeon_device *rdev);
+int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
+int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 extern void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc);
 extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
@@ -465,6 +483,10 @@
  */
 void cayman_fence_ring_emit(struct radeon_device *rdev,
 			    struct radeon_fence *fence);
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait);
 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
@@ -524,5 +546,6 @@
 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 u32 si_get_xclk(struct radeon_device *rdev);
 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
+int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index f22eb57..8c1779c 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2654,6 +2654,113 @@
 		rdev->pm.current_vddc = 0;
 }
 
+union get_clock_dividers {
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS v1;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 v2;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 v3;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 v4;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5 v5;
+};
+
+int radeon_atom_get_clock_dividers(struct radeon_device *rdev,
+				   u8 clock_type,
+				   u32 clock,
+				   bool strobe_mode,
+				   struct atom_clock_dividers *dividers)
+{
+	union get_clock_dividers args;
+	int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL);
+	u8 frev, crev;
+
+	memset(&args, 0, sizeof(args));
+	memset(dividers, 0, sizeof(struct atom_clock_dividers));
+
+	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
+		return -EINVAL;
+
+	switch (crev) {
+	case 1:
+		/* r4xx, r5xx */
+		args.v1.ucAction = clock_type;
+		args.v1.ulClock = cpu_to_le32(clock);	/* 10 khz */
+
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+		dividers->post_div = args.v1.ucPostDiv;
+		dividers->fb_div = args.v1.ucFbDiv;
+		dividers->enable_post_div = true;
+		break;
+	case 2:
+	case 3:
+		/* r6xx, r7xx, evergreen, ni */
+		if (rdev->family <= CHIP_RV770) {
+			args.v2.ucAction = clock_type;
+			args.v2.ulClock = cpu_to_le32(clock);	/* 10 khz */
+
+			atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+			dividers->post_div = args.v2.ucPostDiv;
+			dividers->fb_div = le16_to_cpu(args.v2.usFbDiv);
+			dividers->ref_div = args.v2.ucAction;
+			if (rdev->family == CHIP_RV770) {
+				dividers->enable_post_div = (le32_to_cpu(args.v2.ulClock) & (1 << 24)) ?
+					true : false;
+				dividers->vco_mode = (le32_to_cpu(args.v2.ulClock) & (1 << 25)) ? 1 : 0;
+			} else
+				dividers->enable_post_div = (dividers->fb_div & 1) ? true : false;
+		} else {
+			if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
+				args.v3.ulClock.ulComputeClockFlag = clock_type;
+				args.v3.ulClock.ulClockFreq = cpu_to_le32(clock);	/* 10 khz */
+
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+				dividers->post_div = args.v3.ucPostDiv;
+				dividers->enable_post_div = (args.v3.ucCntlFlag &
+							     ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+				dividers->enable_dithen = (args.v3.ucCntlFlag &
+							   ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+				dividers->fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+				dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
+				dividers->ref_div = args.v3.ucRefDiv;
+				dividers->vco_mode = (args.v3.ucCntlFlag &
+						      ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+			} else {
+				args.v5.ulClock.ulComputeClockFlag = clock_type;
+				args.v5.ulClock.ulClockFreq = cpu_to_le32(clock);	/* 10 khz */
+				if (strobe_mode)
+					args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
+
+				atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+				dividers->post_div = args.v5.ucPostDiv;
+				dividers->enable_post_div = (args.v5.ucCntlFlag &
+							     ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+				dividers->enable_dithen = (args.v5.ucCntlFlag &
+							   ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+				dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
+				dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
+				dividers->ref_div = args.v5.ucRefDiv;
+				dividers->vco_mode = (args.v5.ucCntlFlag &
+						      ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+			}
+		}
+		break;
+	case 4:
+		/* fusion */
+		args.v4.ulClock = cpu_to_le32(clock);	/* 10 khz */
+
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+		dividers->post_div = args.v4.ucPostDiv;
+		dividers->real_clock = le32_to_cpu(args.v4.ulClock);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
 {
 	DYNAMIC_CLOCK_GATING_PS_ALLOCATION args;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 70d3824..c740707 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -63,30 +63,50 @@
 				break;
 			}
 		}
-		if (!duplicate) {
-			p->relocs[i].gobj = drm_gem_object_lookup(ddev,
-								  p->filp,
-								  r->handle);
-			if (p->relocs[i].gobj == NULL) {
-				DRM_ERROR("gem object lookup failed 0x%x\n",
-					  r->handle);
-				return -ENOENT;
-			}
-			p->relocs_ptr[i] = &p->relocs[i];
-			p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
-			p->relocs[i].lobj.bo = p->relocs[i].robj;
-			p->relocs[i].lobj.wdomain = r->write_domain;
-			p->relocs[i].lobj.rdomain = r->read_domains;
-			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
-			p->relocs[i].handle = r->handle;
-			p->relocs[i].flags = r->flags;
-			radeon_bo_list_add_object(&p->relocs[i].lobj,
-						  &p->validated);
-
-		} else
+		if (duplicate) {
 			p->relocs[i].handle = 0;
+			continue;
+		}
+
+		p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
+							  r->handle);
+		if (p->relocs[i].gobj == NULL) {
+			DRM_ERROR("gem object lookup failed 0x%x\n",
+				  r->handle);
+			return -ENOENT;
+		}
+		p->relocs_ptr[i] = &p->relocs[i];
+		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
+		p->relocs[i].lobj.bo = p->relocs[i].robj;
+		p->relocs[i].lobj.written = !!r->write_domain;
+
+		/* the first reloc of an UVD job is the
+		   msg and that must be in VRAM */
+		if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
+			/* TODO: is this still needed for NI+ ? */
+			p->relocs[i].lobj.domain =
+				RADEON_GEM_DOMAIN_VRAM;
+
+			p->relocs[i].lobj.alt_domain =
+				RADEON_GEM_DOMAIN_VRAM;
+
+		} else {
+			uint32_t domain = r->write_domain ?
+				r->write_domain : r->read_domains;
+
+			p->relocs[i].lobj.domain = domain;
+			if (domain == RADEON_GEM_DOMAIN_VRAM)
+				domain |= RADEON_GEM_DOMAIN_GTT;
+			p->relocs[i].lobj.alt_domain = domain;
+		}
+
+		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+		p->relocs[i].handle = r->handle;
+
+		radeon_bo_list_add_object(&p->relocs[i].lobj,
+					  &p->validated);
 	}
-	return radeon_bo_list_validate(&p->validated);
+	return radeon_bo_list_validate(&p->validated, p->ring);
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@@ -121,6 +141,9 @@
 			return -EINVAL;
 		}
 		break;
+	case RADEON_CS_RING_UVD:
+		p->ring = R600_RING_TYPE_UVD_INDEX;
+		break;
 	}
 	return 0;
 }
@@ -241,15 +264,15 @@
 			return -EINVAL;
 		}
 
-		/* we only support VM on SI+ */
-		if ((p->rdev->family >= CHIP_TAHITI) &&
-		    ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
-			DRM_ERROR("VM required on SI+!\n");
-			return -EINVAL;
-		}
-
 		if (radeon_cs_get_ring(p, ring, priority))
 			return -EINVAL;
+
+		/* we only support VM on some SI+ rings */
+		if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) &&
+		   ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
+			DRM_ERROR("Ring %d requires VM!\n", p->ring);
+			return -EINVAL;
+		}
 	}
 
 	/* deal with non-vm */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 44b8034..62d0ba3 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -359,7 +359,7 @@
 	uint64_t limit = (uint64_t)radeon_vram_limit << 20;
 
 	mc->vram_start = base;
-	if (mc->mc_vram_size > (0xFFFFFFFF - base + 1)) {
+	if (mc->mc_vram_size > (rdev->mc.mc_mask - base + 1)) {
 		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
 		mc->real_vram_size = mc->aper_size;
 		mc->mc_vram_size = mc->aper_size;
@@ -394,7 +394,7 @@
 {
 	u64 size_af, size_bf;
 
-	size_af = ((0xFFFFFFFF - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
+	size_af = ((rdev->mc.mc_mask - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
 	size_bf = mc->vram_start & ~mc->gtt_base_align;
 	if (size_bf > size_af) {
 		if (mc->gtt_size > size_bf) {
@@ -1068,6 +1068,17 @@
 		radeon_agp_disable(rdev);
 	}
 
+	/* Set the internal MC address mask
+	 * This is the max address of the GPU's
+	 * internal address space.
+	 */
+	if (rdev->family >= CHIP_CAYMAN)
+		rdev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+	else if (rdev->family >= CHIP_CEDAR)
+		rdev->mc.mc_mask = 0xfffffffffULL; /* 36 bit MC */
+	else
+		rdev->mc.mc_mask = 0xffffffffULL; /* 32 bit MC */
+
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 66a7f0f..d33f484 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -71,9 +71,12 @@
  *   2.28.0 - r600-eg: Add MEM_WRITE packet support
  *   2.29.0 - R500 FP16 color clear registers
  *   2.30.0 - fix for FMASK texturing
+ *   2.31.0 - Add fastfb support for rs690
+ *   2.32.0 - new info request for rings working
+ *   2.33.0 - Add SI tiling mode array query
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	30
+#define KMS_DRIVER_MINOR	33
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -160,6 +163,7 @@
 int radeon_pcie_gen2 = -1;
 int radeon_msi = -1;
 int radeon_lockup_timeout = 10000;
+int radeon_fastfb = 0;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -212,6 +216,9 @@
 MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
 module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
 
+MODULE_PARM_DESC(fastfb, "Direct FB access for IGP chips (0 = disable, 1 = enable)");
+module_param_named(fastfb, radeon_fastfb, int, 0444);
+
 static struct pci_device_id pciidlist[] = {
 	radeon_PCI_IDS
 };
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 3435625..82fe183 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -31,9 +31,9 @@
 #include <linux/seq_file.h>
 #include <linux/atomic.h>
 #include <linux/wait.h>
-#include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "radeon_reg.h"
 #include "radeon.h"
@@ -767,8 +767,21 @@
 
 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
-		rdev->fence_drv[ring].scratch_reg = 0;
-		index = R600_WB_EVENT_OFFSET + ring * 4;
+		if (ring != R600_RING_TYPE_UVD_INDEX) {
+			rdev->fence_drv[ring].scratch_reg = 0;
+			index = R600_WB_EVENT_OFFSET + ring * 4;
+			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
+							 index;
+
+		} else {
+			/* put fence directly behind firmware */
+			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr +
+							 rdev->uvd_fw->size;
+			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr +
+							 rdev->uvd_fw->size;
+		}
+
 	} else {
 		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
 		if (r) {
@@ -778,9 +791,9 @@
 		index = RADEON_WB_SCRATCH_OFFSET +
 			rdev->fence_drv[ring].scratch_reg -
 			rdev->scratch.reg_base;
+		rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+		rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 	}
-	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
-	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 	rdev->fence_drv[ring].initialized = true;
 	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index c75cb2c..4f2d4f4 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -50,9 +50,13 @@
 
 	if (rdev == NULL)
 		return 0;
+	if (rdev->rmmio == NULL)
+		goto done_free;
 	radeon_acpi_fini(rdev);
 	radeon_modeset_fini(rdev);
 	radeon_device_fini(rdev);
+
+done_free:
 	kfree(rdev);
 	dev->dev_private = NULL;
 	return 0;
@@ -176,80 +180,65 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_info *info = data;
 	struct radeon_mode_info *minfo = &rdev->mode_info;
-	uint32_t value, *value_ptr;
-	uint64_t value64, *value_ptr64;
+	uint32_t *value, value_tmp, *value_ptr, value_size;
+	uint64_t value64;
 	struct drm_crtc *crtc;
 	int i, found;
 
-	/* TIMESTAMP is a 64-bit value, needs special handling. */
-	if (info->request == RADEON_INFO_TIMESTAMP) {
-		if (rdev->family >= CHIP_R600) {
-			value_ptr64 = (uint64_t*)((unsigned long)info->value);
-			value64 = radeon_get_gpu_clock_counter(rdev);
-
-			if (DRM_COPY_TO_USER(value_ptr64, &value64, sizeof(value64))) {
-				DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__);
-				return -EFAULT;
-			}
-			return 0;
-		} else {
-			DRM_DEBUG_KMS("timestamp is r6xx+ only!\n");
-			return -EINVAL;
-		}
-	}
-
 	value_ptr = (uint32_t *)((unsigned long)info->value);
-	if (DRM_COPY_FROM_USER(&value, value_ptr, sizeof(value))) {
-		DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
-		return -EFAULT;
-	}
+	value = &value_tmp;
+	value_size = sizeof(uint32_t);
 
 	switch (info->request) {
 	case RADEON_INFO_DEVICE_ID:
-		value = dev->pci_device;
+		*value = dev->pci_device;
 		break;
 	case RADEON_INFO_NUM_GB_PIPES:
-		value = rdev->num_gb_pipes;
+		*value = rdev->num_gb_pipes;
 		break;
 	case RADEON_INFO_NUM_Z_PIPES:
-		value = rdev->num_z_pipes;
+		*value = rdev->num_z_pipes;
 		break;
 	case RADEON_INFO_ACCEL_WORKING:
 		/* xf86-video-ati 6.13.0 relies on this being false for evergreen */
 		if ((rdev->family >= CHIP_CEDAR) && (rdev->family <= CHIP_HEMLOCK))
-			value = false;
+			*value = false;
 		else
-			value = rdev->accel_working;
+			*value = rdev->accel_working;
 		break;
 	case RADEON_INFO_CRTC_FROM_ID:
+		if (DRM_COPY_FROM_USER(value, value_ptr, sizeof(uint32_t))) {
+			DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
+			return -EFAULT;
+		}
 		for (i = 0, found = 0; i < rdev->num_crtc; i++) {
 			crtc = (struct drm_crtc *)minfo->crtcs[i];
-			if (crtc && crtc->base.id == value) {
+			if (crtc && crtc->base.id == *value) {
 				struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-				value = radeon_crtc->crtc_id;
+				*value = radeon_crtc->crtc_id;
 				found = 1;
 				break;
 			}
 		}
 		if (!found) {
-			DRM_DEBUG_KMS("unknown crtc id %d\n", value);
+			DRM_DEBUG_KMS("unknown crtc id %d\n", *value);
 			return -EINVAL;
 		}
 		break;
 	case RADEON_INFO_ACCEL_WORKING2:
-		value = rdev->accel_working;
+		*value = rdev->accel_working;
 		break;
 	case RADEON_INFO_TILING_CONFIG:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.tile_config;
+			*value = rdev->config.si.tile_config;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.tile_config;
+			*value = rdev->config.cayman.tile_config;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.tile_config;
+			*value = rdev->config.evergreen.tile_config;
 		else if (rdev->family >= CHIP_RV770)
-			value = rdev->config.rv770.tile_config;
+			*value = rdev->config.rv770.tile_config;
 		else if (rdev->family >= CHIP_R600)
-			value = rdev->config.r600.tile_config;
+			*value = rdev->config.r600.tile_config;
 		else {
 			DRM_DEBUG_KMS("tiling config is r6xx+ only!\n");
 			return -EINVAL;
@@ -262,73 +251,81 @@
 		 *
 		 * When returning, the value is 1 if filp owns hyper-z access,
 		 * 0 otherwise. */
-		if (value >= 2) {
-			DRM_DEBUG_KMS("WANT_HYPERZ: invalid value %d\n", value);
+		if (DRM_COPY_FROM_USER(value, value_ptr, sizeof(uint32_t))) {
+			DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
+			return -EFAULT;
+		}
+		if (*value >= 2) {
+			DRM_DEBUG_KMS("WANT_HYPERZ: invalid value %d\n", *value);
 			return -EINVAL;
 		}
-		radeon_set_filp_rights(dev, &rdev->hyperz_filp, filp, &value);
+		radeon_set_filp_rights(dev, &rdev->hyperz_filp, filp, value);
 		break;
 	case RADEON_INFO_WANT_CMASK:
 		/* The same logic as Hyper-Z. */
-		if (value >= 2) {
-			DRM_DEBUG_KMS("WANT_CMASK: invalid value %d\n", value);
+		if (DRM_COPY_FROM_USER(value, value_ptr, sizeof(uint32_t))) {
+			DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
+			return -EFAULT;
+		}
+		if (*value >= 2) {
+			DRM_DEBUG_KMS("WANT_CMASK: invalid value %d\n", *value);
 			return -EINVAL;
 		}
-		radeon_set_filp_rights(dev, &rdev->cmask_filp, filp, &value);
+		radeon_set_filp_rights(dev, &rdev->cmask_filp, filp, value);
 		break;
 	case RADEON_INFO_CLOCK_CRYSTAL_FREQ:
 		/* return clock value in KHz */
 		if (rdev->asic->get_xclk)
-			value = radeon_get_xclk(rdev) * 10;
+			*value = radeon_get_xclk(rdev) * 10;
 		else
-			value = rdev->clock.spll.reference_freq * 10;
+			*value = rdev->clock.spll.reference_freq * 10;
 		break;
 	case RADEON_INFO_NUM_BACKENDS:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_backends_per_se *
+			*value = rdev->config.si.max_backends_per_se *
 				rdev->config.si.max_shader_engines;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.max_backends_per_se *
+			*value = rdev->config.cayman.max_backends_per_se *
 				rdev->config.cayman.max_shader_engines;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.max_backends;
+			*value = rdev->config.evergreen.max_backends;
 		else if (rdev->family >= CHIP_RV770)
-			value = rdev->config.rv770.max_backends;
+			*value = rdev->config.rv770.max_backends;
 		else if (rdev->family >= CHIP_R600)
-			value = rdev->config.r600.max_backends;
+			*value = rdev->config.r600.max_backends;
 		else {
 			return -EINVAL;
 		}
 		break;
 	case RADEON_INFO_NUM_TILE_PIPES:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_tile_pipes;
+			*value = rdev->config.si.max_tile_pipes;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.max_tile_pipes;
+			*value = rdev->config.cayman.max_tile_pipes;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.max_tile_pipes;
+			*value = rdev->config.evergreen.max_tile_pipes;
 		else if (rdev->family >= CHIP_RV770)
-			value = rdev->config.rv770.max_tile_pipes;
+			*value = rdev->config.rv770.max_tile_pipes;
 		else if (rdev->family >= CHIP_R600)
-			value = rdev->config.r600.max_tile_pipes;
+			*value = rdev->config.r600.max_tile_pipes;
 		else {
 			return -EINVAL;
 		}
 		break;
 	case RADEON_INFO_FUSION_GART_WORKING:
-		value = 1;
+		*value = 1;
 		break;
 	case RADEON_INFO_BACKEND_MAP:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.backend_map;
+			*value = rdev->config.si.backend_map;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.backend_map;
+			*value = rdev->config.cayman.backend_map;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.backend_map;
+			*value = rdev->config.evergreen.backend_map;
 		else if (rdev->family >= CHIP_RV770)
-			value = rdev->config.rv770.backend_map;
+			*value = rdev->config.rv770.backend_map;
 		else if (rdev->family >= CHIP_R600)
-			value = rdev->config.r600.backend_map;
+			*value = rdev->config.r600.backend_map;
 		else {
 			return -EINVAL;
 		}
@@ -337,50 +334,91 @@
 		/* this is where we report if vm is supported or not */
 		if (rdev->family < CHIP_CAYMAN)
 			return -EINVAL;
-		value = RADEON_VA_RESERVED_SIZE;
+		*value = RADEON_VA_RESERVED_SIZE;
 		break;
 	case RADEON_INFO_IB_VM_MAX_SIZE:
 		/* this is where we report if vm is supported or not */
 		if (rdev->family < CHIP_CAYMAN)
 			return -EINVAL;
-		value = RADEON_IB_VM_MAX_SIZE;
+		*value = RADEON_IB_VM_MAX_SIZE;
 		break;
 	case RADEON_INFO_MAX_PIPES:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_cu_per_sh;
+			*value = rdev->config.si.max_cu_per_sh;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.max_pipes_per_simd;
+			*value = rdev->config.cayman.max_pipes_per_simd;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.max_pipes;
+			*value = rdev->config.evergreen.max_pipes;
 		else if (rdev->family >= CHIP_RV770)
-			value = rdev->config.rv770.max_pipes;
+			*value = rdev->config.rv770.max_pipes;
 		else if (rdev->family >= CHIP_R600)
-			value = rdev->config.r600.max_pipes;
+			*value = rdev->config.r600.max_pipes;
 		else {
 			return -EINVAL;
 		}
 		break;
+	case RADEON_INFO_TIMESTAMP:
+		if (rdev->family < CHIP_R600) {
+			DRM_DEBUG_KMS("timestamp is r6xx+ only!\n");
+			return -EINVAL;
+		}
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = radeon_get_gpu_clock_counter(rdev);
+		break;
 	case RADEON_INFO_MAX_SE:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_shader_engines;
+			*value = rdev->config.si.max_shader_engines;
 		else if (rdev->family >= CHIP_CAYMAN)
-			value = rdev->config.cayman.max_shader_engines;
+			*value = rdev->config.cayman.max_shader_engines;
 		else if (rdev->family >= CHIP_CEDAR)
-			value = rdev->config.evergreen.num_ses;
+			*value = rdev->config.evergreen.num_ses;
 		else
-			value = 1;
+			*value = 1;
 		break;
 	case RADEON_INFO_MAX_SH_PER_SE:
 		if (rdev->family >= CHIP_TAHITI)
-			value = rdev->config.si.max_sh_per_se;
+			*value = rdev->config.si.max_sh_per_se;
 		else
 			return -EINVAL;
 		break;
+	case RADEON_INFO_FASTFB_WORKING:
+		*value = rdev->fastfb_working;
+		break;
+	case RADEON_INFO_RING_WORKING:
+		if (DRM_COPY_FROM_USER(value, value_ptr, sizeof(uint32_t))) {
+			DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
+			return -EFAULT;
+		}
+		switch (*value) {
+		case RADEON_CS_RING_GFX:
+		case RADEON_CS_RING_COMPUTE:
+			*value = rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready;
+			break;
+		case RADEON_CS_RING_DMA:
+			*value = rdev->ring[R600_RING_TYPE_DMA_INDEX].ready;
+			*value |= rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready;
+			break;
+		case RADEON_CS_RING_UVD:
+			*value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case RADEON_INFO_SI_TILE_MODE_ARRAY:
+		if (rdev->family < CHIP_TAHITI) {
+			DRM_DEBUG_KMS("tile mode array is si only!\n");
+			return -EINVAL;
+		}
+		value = rdev->config.si.tile_mode_array;
+		value_size = sizeof(uint32_t)*32;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
 	}
-	if (DRM_COPY_TO_USER(value_ptr, &value, sizeof(uint32_t))) {
+	if (DRM_COPY_TO_USER(value_ptr, (char*)value, value_size)) {
 		DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__);
 		return -EFAULT;
 	}
@@ -513,6 +551,7 @@
 		rdev->hyperz_filp = NULL;
 	if (rdev->cmask_filp == file_priv)
 		rdev->cmask_filp = NULL;
+	radeon_uvd_free_handles(rdev, file_priv);
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 4003f5a..44e579e 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -492,6 +492,29 @@
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
+struct atom_clock_dividers {
+	u32 post_div;
+	union {
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 reserved : 6;
+			u32 whole_fb_div : 12;
+			u32 frac_fb_div : 14;
+#else
+			u32 frac_fb_div : 14;
+			u32 whole_fb_div : 12;
+			u32 reserved : 6;
+#endif
+		};
+		u32 fb_div;
+	};
+	u32 ref_div;
+	bool enable_post_div;
+	bool enable_dithen;
+	u32 vco_mode;
+	u32 real_clock;
+};
+
 extern enum radeon_tv_std
 radeon_combios_get_tv_info(struct radeon_device *rdev);
 extern enum radeon_tv_std
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d3aface..1424ccd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -321,8 +321,10 @@
 int radeon_bo_init(struct radeon_device *rdev)
 {
 	/* Add an MTRR for the VRAM */
-	rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+	if (!rdev->fastfb_working) {
+		rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
 			MTRR_TYPE_WRCOMB, 1);
+	}
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
 		rdev->mc.mc_vram_size >> 20,
 		(unsigned long long)rdev->mc.aper_size >> 20);
@@ -339,14 +341,14 @@
 void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head)
 {
-	if (lobj->wdomain) {
+	if (lobj->written) {
 		list_add(&lobj->tv.head, head);
 	} else {
 		list_add_tail(&lobj->tv.head, head);
 	}
 }
 
-int radeon_bo_list_validate(struct list_head *head)
+int radeon_bo_list_validate(struct list_head *head, int ring)
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
@@ -360,15 +362,17 @@
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
-			domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
+			domain = lobj->domain;
 			
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
+			if (ring == R600_RING_TYPE_UVD_INDEX)
+				radeon_uvd_force_into_uvd_segment(bo);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement,
 						true, false);
 			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
-					domain |= RADEON_GEM_DOMAIN_GTT;
+				if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
+					domain = lobj->alt_domain;
 					goto retry;
 				}
 				return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 5fc86b0..e2cb80a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -128,7 +128,7 @@
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head);
+extern int radeon_bo_list_validate(struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 338fd6a..788c64c 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -843,7 +843,11 @@
 	struct radeon_device *rdev = dev->dev_private;
 
 	seq_printf(m, "default engine clock: %u0 kHz\n", rdev->pm.default_sclk);
-	seq_printf(m, "current engine clock: %u0 kHz\n", radeon_get_engine_clock(rdev));
+	/* radeon_get_engine_clock is not reliable on APUs so just print the current clock */
+	if ((rdev->family >= CHIP_PALM) && (rdev->flags & RADEON_IS_IGP))
+		seq_printf(m, "current engine clock: %u0 kHz\n", rdev->pm.current_sclk);
+	else
+		seq_printf(m, "current engine clock: %u0 kHz\n", radeon_get_engine_clock(rdev));
 	seq_printf(m, "default memory clock: %u0 kHz\n", rdev->pm.default_mclk);
 	if (rdev->asic->pm.get_memory_clock)
 		seq_printf(m, "current memory clock: %u0 kHz\n", radeon_get_memory_clock(rdev));
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 8d58e26..e17faa7 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -180,7 +180,8 @@
 		radeon_semaphore_free(rdev, &ib->semaphore, NULL);
 	}
 	/* if we can't remember our last VM flush then flush now! */
-	if (ib->vm && !ib->vm->last_flush) {
+	/* XXX figure out why we have to flush for every IB */
+	if (ib->vm /*&& !ib->vm->last_flush*/) {
 		radeon_ring_vm_flush(rdev, ib->ring, ib->vm);
 	}
 	if (const_ib) {
@@ -368,7 +369,7 @@
 {
 	u32 rptr;
 
-	if (rdev->wb.enabled)
+	if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
 	else
 		rptr = RREG32(ring->rptr_reg);
@@ -821,18 +822,20 @@
 	return 0;
 }
 
-static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
-static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
-static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
-static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
-static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
+static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
+static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
+static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
 
 static struct drm_info_list radeon_debugfs_ring_info_list[] = {
-	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
-	{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
-	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
-	{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
-	{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
+	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
+	{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index},
+	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index},
+	{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
+	{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
+	{"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
 };
 
 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index fda09c9..bbed4af 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -252,6 +252,36 @@
 		radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
 }
 
+static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
+					     struct radeon_ring *ring,
+					     struct radeon_fence **fence)
+{
+	int r;
+
+	if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
+		r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+		if (r) {
+			DRM_ERROR("Failed to get dummy create msg\n");
+			return r;
+		}
+
+		r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
+		if (r) {
+			DRM_ERROR("Failed to get dummy destroy msg\n");
+			return r;
+		}
+	} else {
+		r = radeon_ring_lock(rdev, ring, 64);
+		if (r) {
+			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
+			return r;
+		}
+		radeon_fence_emit(rdev, fence, ring->idx);
+		radeon_ring_unlock_commit(rdev, ring);
+	}
+	return 0;
+}
+
 void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *ringA,
 			   struct radeon_ring *ringB)
@@ -272,21 +302,24 @@
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fence1, ringA->idx);
+	radeon_ring_unlock_commit(rdev, ringA);
+
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
+	if (r)
+		goto out_cleanup;
+
+	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to emit fence 1\n");
-		radeon_ring_unlock_undo(rdev, ringA);
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fence2, ringA->idx);
-	if (r) {
-		DRM_ERROR("Failed to emit fence 2\n");
-		radeon_ring_unlock_undo(rdev, ringA);
-		goto out_cleanup;
-	}
 	radeon_ring_unlock_commit(rdev, ringA);
 
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
+	if (r)
+		goto out_cleanup;
+
 	mdelay(1000);
 
 	if (radeon_fence_signaled(fence1)) {
@@ -364,27 +397,22 @@
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
-	if (r) {
-		DRM_ERROR("Failed to emit sync fence 1\n");
-		radeon_ring_unlock_undo(rdev, ringA);
-		goto out_cleanup;
-	}
 	radeon_ring_unlock_commit(rdev, ringA);
 
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
+	if (r)
+		goto out_cleanup;
+
 	r = radeon_ring_lock(rdev, ringB, 64);
 	if (r) {
 		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
-	if (r) {
-		DRM_ERROR("Failed to create sync fence 2\n");
-		radeon_ring_unlock_undo(rdev, ringB);
-		goto out_cleanup;
-	}
 	radeon_ring_unlock_commit(rdev, ringB);
+	r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
+	if (r)
+		goto out_cleanup;
 
 	mdelay(1000);
 
@@ -393,7 +421,7 @@
 		goto out_cleanup;
 	}
 	if (radeon_fence_signaled(fenceB)) {
-		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
+		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
 		goto out_cleanup;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
new file mode 100644
index 0000000..30a9460
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+#include "r600d.h"
+
+/* Firmware Names */
+#define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
+#define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
+#define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
+#define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RV710);
+MODULE_FIRMWARE(FIRMWARE_CYPRESS);
+MODULE_FIRMWARE(FIRMWARE_SUMO);
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+
+int radeon_uvd_init(struct radeon_device *rdev)
+{
+	struct platform_device *pdev;
+	unsigned long bo_size;
+	const char *fw_name;
+	int i, r;
+
+	pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0);
+	r = IS_ERR(pdev);
+	if (r) {
+		dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n");
+		return -EINVAL;
+	}
+
+	switch (rdev->family) {
+	case CHIP_RV710:
+	case CHIP_RV730:
+	case CHIP_RV740:
+		fw_name = FIRMWARE_RV710;
+		break;
+
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+	case CHIP_JUNIPER:
+	case CHIP_REDWOOD:
+	case CHIP_CEDAR:
+		fw_name = FIRMWARE_CYPRESS;
+		break;
+
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
+	case CHIP_PALM:
+	case CHIP_CAYMAN:
+	case CHIP_BARTS:
+	case CHIP_TURKS:
+	case CHIP_CAICOS:
+		fw_name = FIRMWARE_SUMO;
+		break;
+
+	case CHIP_TAHITI:
+	case CHIP_VERDE:
+	case CHIP_PITCAIRN:
+	case CHIP_ARUBA:
+		fw_name = FIRMWARE_TAHITI;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev);
+	if (r) {
+		dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
+			fw_name);
+		platform_device_unregister(pdev);
+		return r;
+	}
+
+	platform_device_unregister(pdev);
+
+	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) +
+		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
+	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
+		return r;
+	}
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	memset(rdev->uvd.cpu_addr, 0, bo_size);
+	memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
+
+	r = radeon_uvd_suspend(rdev);
+	if (r)
+		return r;
+
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		atomic_set(&rdev->uvd.handles[i], 0);
+		rdev->uvd.filp[i] = NULL;
+	}
+
+	return 0;
+}
+
+void radeon_uvd_fini(struct radeon_device *rdev)
+{
+	radeon_uvd_suspend(rdev);
+	radeon_bo_unref(&rdev->uvd.vcpu_bo);
+}
+
+int radeon_uvd_suspend(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->uvd.vcpu_bo == NULL)
+		return 0;
+
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+	if (!r) {
+		radeon_bo_kunmap(rdev->uvd.vcpu_bo);
+		radeon_bo_unpin(rdev->uvd.vcpu_bo);
+		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	}
+	return r;
+}
+
+int radeon_uvd_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->uvd.vcpu_bo == NULL)
+		return -EINVAL;
+
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+	if (r) {
+		radeon_bo_unref(&rdev->uvd.vcpu_bo);
+		dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->uvd.gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+		radeon_bo_unref(&rdev->uvd.vcpu_bo);
+		dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
+		return r;
+	}
+
+	r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
+	if (r) {
+		dev_err(rdev->dev, "(%d) UVD map failed\n", r);
+		return r;
+	}
+
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+
+	radeon_set_uvd_clocks(rdev, 53300, 40000);
+
+	return 0;
+}
+
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
+{
+	rbo->placement.fpfn = 0 >> PAGE_SHIFT;
+	rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+}
+
+void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
+{
+	int i, r;
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (rdev->uvd.filp[i] == filp) {
+			uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+			struct radeon_fence *fence;
+
+			r = radeon_uvd_get_destroy_msg(rdev,
+				R600_RING_TYPE_UVD_INDEX, handle, &fence);
+			if (r) {
+				DRM_ERROR("Error destroying UVD (%d)!\n", r);
+				continue;
+			}
+
+			radeon_fence_wait(fence, false);
+			radeon_fence_unref(&fence);
+
+			rdev->uvd.filp[i] = NULL;
+			atomic_set(&rdev->uvd.handles[i], 0);
+		}
+	}
+}
+
+static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
+{
+	unsigned stream_type = msg[4];
+	unsigned width = msg[6];
+	unsigned height = msg[7];
+	unsigned dpb_size = msg[9];
+	unsigned pitch = msg[28];
+
+	unsigned width_in_mb = width / 16;
+	unsigned height_in_mb = ALIGN(height / 16, 2);
+
+	unsigned image_size, tmp, min_dpb_size;
+
+	image_size = width * height;
+	image_size += image_size / 2;
+	image_size = ALIGN(image_size, 1024);
+
+	switch (stream_type) {
+	case 0: /* H264 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 17;
+
+		/* macroblock context buffer */
+		min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
+
+		/* IT surface buffer */
+		min_dpb_size += width_in_mb * height_in_mb * 32;
+		break;
+
+	case 1: /* VC1 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+
+		/* CONTEXT_BUFFER */
+		min_dpb_size += width_in_mb * height_in_mb * 128;
+
+		/* IT surface buffer */
+		min_dpb_size += width_in_mb * 64;
+
+		/* DB surface buffer */
+		min_dpb_size += width_in_mb * 128;
+
+		/* BP */
+		tmp = max(width_in_mb, height_in_mb);
+		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
+		break;
+
+	case 3: /* MPEG2 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+		break;
+
+	case 4: /* MPEG4 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+
+		/* CM */
+		min_dpb_size += width_in_mb * height_in_mb * 64;
+
+		/* IT surface buffer */
+		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
+		break;
+
+	default:
+		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
+		return -EINVAL;
+	}
+
+	if (width > pitch) {
+		DRM_ERROR("Invalid UVD decoding target pitch!\n");
+		return -EINVAL;
+	}
+
+	if (dpb_size < min_dpb_size) {
+		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
+			  dpb_size, min_dpb_size);
+		return -EINVAL;
+	}
+
+	buf_sizes[0x1] = dpb_size;
+	buf_sizes[0x2] = image_size;
+	return 0;
+}
+
+static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
+			     unsigned offset, unsigned buf_sizes[])
+{
+	int32_t *msg, msg_type, handle;
+	void *ptr;
+
+	int i, r;
+
+	if (offset & 0x3F) {
+		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+		return -EINVAL;
+	}
+
+	r = radeon_bo_kmap(bo, &ptr);
+	if (r)
+		return r;
+
+	msg = ptr + offset;
+
+	msg_type = msg[1];
+	handle = msg[2];
+
+	if (handle == 0) {
+		DRM_ERROR("Invalid UVD handle!\n");
+		return -EINVAL;
+	}
+
+	if (msg_type == 1) {
+		/* it's a decode msg, calc buffer sizes */
+		r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
+		radeon_bo_kunmap(bo);
+		if (r)
+			return r;
+
+	} else if (msg_type == 2) {
+		/* it's a destroy msg, free the handle */
+		for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
+			atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
+		radeon_bo_kunmap(bo);
+		return 0;
+	} else {
+		/* it's a create msg, no special handling needed */
+		radeon_bo_kunmap(bo);
+	}
+
+	/* create or decode, validate the handle */
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
+			return 0;
+	}
+
+	/* handle not found try to alloc a new one */
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
+			p->rdev->uvd.filp[i] = p->filp;
+			return 0;
+		}
+	}
+
+	DRM_ERROR("No more free UVD handles!\n");
+	return -EINVAL;
+}
+
+static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
+			       int data0, int data1,
+			       unsigned buf_sizes[])
+{
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_reloc *reloc;
+	unsigned idx, cmd, offset;
+	uint64_t start, end;
+	int r;
+
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	offset = radeon_get_ib_value(p, data0);
+	idx = radeon_get_ib_value(p, data1);
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+
+	reloc = p->relocs_ptr[(idx / 4)];
+	start = reloc->lobj.gpu_offset;
+	end = start + radeon_bo_size(reloc->robj);
+	start += offset;
+
+	p->ib.ptr[data0] = start & 0xFFFFFFFF;
+	p->ib.ptr[data1] = start >> 32;
+
+	cmd = radeon_get_ib_value(p, p->idx) >> 1;
+
+	if (cmd < 0x4) {
+		if ((end - start) < buf_sizes[cmd]) {
+			DRM_ERROR("buffer to small (%d / %d)!\n",
+				  (unsigned)(end - start), buf_sizes[cmd]);
+			return -EINVAL;
+		}
+
+	} else if (cmd != 0x100) {
+		DRM_ERROR("invalid UVD command %X!\n", cmd);
+		return -EINVAL;
+	}
+
+	if (cmd == 0) {
+		if (end & 0xFFFFFFFFF0000000) {
+			DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n",
+				  start, end);
+			return -EINVAL;
+		}
+
+		r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
+		if (r)
+			return r;
+	}
+
+	if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) {
+		DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+			  start, end);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int *data0, int *data1,
+			     unsigned buf_sizes[])
+{
+	int i, r;
+
+	p->idx++;
+	for (i = 0; i <= pkt->count; ++i) {
+		switch (pkt->reg + i*4) {
+		case UVD_GPCOM_VCPU_DATA0:
+			*data0 = p->idx;
+			break;
+		case UVD_GPCOM_VCPU_DATA1:
+			*data1 = p->idx;
+			break;
+		case UVD_GPCOM_VCPU_CMD:
+			r = radeon_uvd_cs_reloc(p, *data0, *data1, buf_sizes);
+			if (r)
+				return r;
+			break;
+		case UVD_ENGINE_CNTL:
+			break;
+		default:
+			DRM_ERROR("Invalid reg 0x%X!\n",
+				  pkt->reg + i*4);
+			return -EINVAL;
+		}
+		p->idx++;
+	}
+	return 0;
+}
+
+int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	int r, data0 = 0, data1 = 0;
+
+	/* minimum buffer sizes */
+	unsigned buf_sizes[] = {
+		[0x00000000]	=	2048,
+		[0x00000001]	=	32 * 1024 * 1024,
+		[0x00000002]	=	2048 * 1152 * 3,
+		[0x00000003]	=	2048,
+	};
+
+	if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
+		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
+			  p->chunks[p->chunk_ib_idx].length_dw);
+		return -EINVAL;
+	}
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+
+
+	do {
+		r = radeon_cs_packet_parse(p, &pkt, p->idx);
+		if (r)
+			return r;
+		switch (pkt.type) {
+		case RADEON_PACKET_TYPE0:
+			r = radeon_uvd_cs_reg(p, &pkt, &data0,
+					      &data1, buf_sizes);
+			if (r)
+				return r;
+			break;
+		case RADEON_PACKET_TYPE2:
+			p->idx += pkt.count + 2;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			return -EINVAL;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	return 0;
+}
+
+static int radeon_uvd_send_msg(struct radeon_device *rdev,
+			       int ring, struct radeon_bo *bo,
+			       struct radeon_fence **fence)
+{
+	struct ttm_validate_buffer tv;
+	struct list_head head;
+	struct radeon_ib ib;
+	uint64_t addr;
+	int i, r;
+
+	memset(&tv, 0, sizeof(tv));
+	tv.bo = &bo->tbo;
+
+	INIT_LIST_HEAD(&head);
+	list_add(&tv.head, &head);
+
+	r = ttm_eu_reserve_buffers(&head);
+	if (r)
+		return r;
+
+	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_uvd_force_into_uvd_segment(bo);
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+
+	r = radeon_ib_get(rdev, ring, &ib, NULL, 16);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+
+	addr = radeon_bo_gpu_offset(bo);
+	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
+	ib.ptr[1] = addr;
+	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
+	ib.ptr[3] = addr >> 32;
+	ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
+	ib.ptr[5] = 0;
+	for (i = 6; i < 16; ++i)
+		ib.ptr[i] = PACKET2(0);
+	ib.length_dw = 16;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+	ttm_eu_fence_buffer_objects(&head, ib.fence);
+
+	if (fence)
+		*fence = radeon_fence_ref(ib.fence);
+
+	radeon_ib_free(rdev, &ib);
+	radeon_bo_unref(&bo);
+	return 0;
+}
+
+/* multiple fence commands without any stream commands in between can
+   crash the vcpu so just try to emmit a dummy create/destroy msg to
+   avoid this */
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence)
+{
+	struct radeon_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+	if (r)
+		return r;
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an UVD create msg */
+	msg[0] = 0x00000de4;
+	msg[1] = 0x00000000;
+	msg[2] = handle;
+	msg[3] = 0x00000000;
+	msg[4] = 0x00000000;
+	msg[5] = 0x00000000;
+	msg[6] = 0x00000000;
+	msg[7] = 0x00000780;
+	msg[8] = 0x00000440;
+	msg[9] = 0x00000000;
+	msg[10] = 0x01b37000;
+	for (i = 11; i < 1024; ++i)
+		msg[i] = 0x0;
+
+	radeon_bo_kunmap(bo);
+	radeon_bo_unreserve(bo);
+
+	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
+
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence)
+{
+	struct radeon_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+	if (r)
+		return r;
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an UVD destroy msg */
+	msg[0] = 0x00000de4;
+	msg[1] = 0x00000002;
+	msg[2] = handle;
+	msg[3] = 0x00000000;
+	for (i = 4; i < 1024; ++i)
+		msg[i] = 0x0;
+
+	radeon_bo_kunmap(bo);
+	radeon_bo_unreserve(bo);
+
+	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 5706d2a..ab4c86c 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -148,6 +148,8 @@
 static void rs690_mc_init(struct radeon_device *rdev)
 {
 	u64 base;
+	uint32_t h_addr, l_addr;
+	unsigned long long k8_addr;
 
 	rs400_gart_adjust_size(rdev);
 	rdev->mc.vram_is_ddr = true;
@@ -160,6 +162,27 @@
 	base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
 	base = G_000100_MC_FB_START(base) << 16;
 	rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
+
+	/* Use K8 direct mapping for fast fb access. */ 
+	rdev->fastfb_working = false;
+	h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
+	l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
+	k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+	if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)	
+#endif
+	{
+		/* FastFB shall be used with UMA memory. Here it is simply disabled when sideport 
+		 * memory is present.
+		 */
+		if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
+			DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n", 
+					(unsigned long long)rdev->mc.aper_base, k8_addr);
+			rdev->mc.aper_base = (resource_size_t)k8_addr;
+			rdev->fastfb_working = true;
+		}
+	}  
+
 	rs690_pm_info(rdev);
 	radeon_vram_location(rdev, &rdev->mc, base);
 	rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
diff --git a/drivers/gpu/drm/radeon/rs690d.h b/drivers/gpu/drm/radeon/rs690d.h
index 36e6398..8af3ccf 100644
--- a/drivers/gpu/drm/radeon/rs690d.h
+++ b/drivers/gpu/drm/radeon/rs690d.h
@@ -29,6 +29,9 @@
 #define __RS690D_H__
 
 /* Registers */
+#define R_00001E_K8_FB_LOCATION                      0x00001E
+#define R_00005F_MC_MISC_UMA_CNTL                    0x00005F
+#define   G_00005F_K8_ADDR_EXT(x)                      (((x) >> 0) & 0xFF)
 #define R_000078_MC_INDEX                            0x000078
 #define   S_000078_MC_IND_ADDR(x)                      (((x) & 0x1FF) << 0)
 #define   G_000078_MC_IND_ADDR(x)                      (((x) >> 0) & 0x1FF)
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 435ed35..5e1ba16 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -348,7 +348,7 @@
 	/* update crtc base addresses */
 	for (i = 0; i < rdev->num_crtc; i++) {
 		if (rdev->family >= CHIP_RV770) {
-			if (i == 1) {
+			if (i == 0) {
 				WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
 				       upper_32_bits(rdev->mc.vram_start));
 				WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d63fe1d..777f537 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -42,6 +42,162 @@
 static void rv770_gpu_init(struct radeon_device *rdev);
 void rv770_fini(struct radeon_device *rdev);
 static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
+int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
+
+static int rv770_uvd_calc_post_div(unsigned target_freq,
+				   unsigned vco_freq,
+				   unsigned *div)
+{
+	/* Fclk = Fvco / PDIV */
+	*div = vco_freq / target_freq;
+
+	/* we alway need a frequency less than or equal the target */
+	if ((vco_freq / *div) > target_freq)
+		*div += 1;
+
+	/* out of range ? */
+	if (*div > 30)
+		return -1; /* forget it */
+
+	*div -= 1;
+	return vco_freq / (*div + 1);
+}
+
+static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev)
+{
+	unsigned i;
+
+	/* assert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	/* deassert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+	return 0;
+}
+
+int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
+{
+	/* start off with something large */
+	int optimal_diff_score = 0x7FFFFFF;
+	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
+	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
+	unsigned vco_freq, vco_min = 50000, vco_max = 160000;
+	unsigned ref_freq = rdev->clock.spll.reference_freq;
+	int r;
+
+	/* RV740 uses evergreen uvd clk programming */
+	if (rdev->family == CHIP_RV740)
+		return evergreen_set_uvd_clocks(rdev, vclk, dclk);
+
+	/* loop through vco from low to high */
+	vco_min = max(max(vco_min, vclk), dclk);
+	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) {
+		uint64_t fb_div = (uint64_t)vco_freq * 43663;
+		int calc_clk, diff_score, diff_vclk, diff_dclk;
+		unsigned vclk_div, dclk_div;
+
+		do_div(fb_div, ref_freq);
+		fb_div |= 1;
+
+		/* fb div out of range ? */
+		if (fb_div > 0x03FFFFFF)
+			break; /* it can oly get worse */
+
+		/* calc vclk with current vco freq. */
+		calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_vclk = vclk - calc_clk;
+
+		/* calc dclk with current vco freq. */
+		calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_dclk = dclk - calc_clk;
+
+		/* determine if this vco setting is better than current optimal settings */
+		diff_score = abs(diff_vclk) + abs(diff_dclk);
+		if (diff_score < optimal_diff_score) {
+			optimal_fb_div = fb_div;
+			optimal_vclk_div = vclk_div;
+			optimal_dclk_div = dclk_div;
+			optimal_vco_freq = vco_freq;
+			optimal_diff_score = diff_score;
+			if (optimal_diff_score == 0)
+				break; /* it can't get better than this */
+		}
+	}
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* set UPLL_FB_DIV to 0x50000 */
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(0x50000), ~UPLL_FB_DIV_MASK);
+
+	/* deassert UPLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	/* assert BYPASS EN and FB_DIV[0] <- ??? why? */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(1), ~UPLL_FB_DIV(1));
+
+	r = rv770_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* assert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* set the required FB_DIV, REF_DIV, Post divder values */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REF_DIV(1), ~UPLL_REF_DIV_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 UPLL_SW_HILEN(optimal_vclk_div >> 1) |
+		 UPLL_SW_LOLEN((optimal_vclk_div >> 1) + (optimal_vclk_div & 1)) |
+		 UPLL_SW_HILEN2(optimal_dclk_div >> 1) |
+		 UPLL_SW_LOLEN2((optimal_dclk_div >> 1) + (optimal_dclk_div & 1)),
+		 ~UPLL_SW_MASK);
+
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div),
+		 ~UPLL_FB_DIV_MASK);
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* deassert BYPASS EN and FB_DIV[0] <- ??? why? */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, 0, ~UPLL_FB_DIV(1));
+
+	r = rv770_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
+	return 0;
+}
 
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
@@ -68,6 +224,105 @@
 	return reference_clock;
 }
 
+int rv770_uvd_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t chip_id, size;
+	int r;
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = rdev->uvd.gpu_addr >> 3;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	/* tell firmware which hardware it is running on */
+	switch (rdev->family) {
+	default:
+		return -EINVAL;
+	case CHIP_RV710:
+		chip_id = 0x01000005;
+		break;
+	case CHIP_RV730:
+		chip_id = 0x01000006;
+		break;
+	case CHIP_RV740:
+		chip_id = 0x01000007;
+		break;
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		chip_id = 0x01000008;
+		break;
+	case CHIP_JUNIPER:
+		chip_id = 0x01000009;
+		break;
+	case CHIP_REDWOOD:
+		chip_id = 0x0100000a;
+		break;
+	case CHIP_CEDAR:
+		chip_id = 0x0100000b;
+		break;
+	case CHIP_SUMO:
+		chip_id = 0x0100000c;
+		break;
+	case CHIP_SUMO2:
+		chip_id = 0x0100000d;
+		break;
+	case CHIP_PALM:
+		chip_id = 0x0100000e;
+		break;
+	case CHIP_CAYMAN:
+		chip_id = 0x0100000f;
+		break;
+	case CHIP_BARTS:
+		chip_id = 0x01000010;
+		break;
+	case CHIP_TURKS:
+		chip_id = 0x01000011;
+		break;
+	case CHIP_CAICOS:
+		chip_id = 0x01000012;
+		break;
+	case CHIP_TAHITI:
+		chip_id = 0x01000014;
+		break;
+	case CHIP_VERDE:
+		chip_id = 0x01000015;
+		break;
+	case CHIP_PITCAIRN:
+		chip_id = 0x01000016;
+		break;
+	case CHIP_ARUBA:
+		chip_id = 0x01000017;
+		break;
+	}
+	WREG32(UVD_VCPU_CHIP_ID, chip_id);
+
+	return 0;
+}
+
 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -611,6 +866,11 @@
 	WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
 	WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
 	WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
+	if (rdev->family == CHIP_RV730) {
+		WREG32(UVD_UDEC_DB_TILING_CONFIG, (gb_tiling_config & 0xffff));
+		WREG32(UVD_UDEC_DBW_TILING_CONFIG, (gb_tiling_config & 0xffff));
+		WREG32(UVD_UDEC_TILING_CONFIG, (gb_tiling_config & 0xffff));
+	}
 
 	WREG32(CGTS_SYS_TCC_DISABLE, 0);
 	WREG32(CGTS_TCC_DISABLE, 0);
@@ -840,7 +1100,7 @@
 	}
 	if (rdev->flags & RADEON_IS_AGP) {
 		size_bf = mc->gtt_start;
-		size_af = 0xFFFFFFFF - mc->gtt_end;
+		size_af = mc->mc_mask - mc->gtt_end;
 		if (size_bf > size_af) {
 			if (mc->mc_vram_size > size_bf) {
 				dev_warn(rdev->dev, "limiting VRAM\n");
@@ -1040,6 +1300,17 @@
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -1074,6 +1345,19 @@
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1115,6 +1399,7 @@
 int rv770_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
 	r600_irq_suspend(rdev);
@@ -1190,6 +1475,13 @@
 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+		r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+			       4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1224,6 +1516,7 @@
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
+	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);
@@ -1264,23 +1557,23 @@
 	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
 
 	/* advertise upconfig capability */
-	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 	link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-	WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
-	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+	WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 	if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) {
 		lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT;
 		link_width_cntl &= ~(LC_LINK_WIDTH_MASK |
 				     LC_RECONFIG_ARC_MISSING_ESCAPE);
 		link_width_cntl |= lanes | LC_RECONFIG_NOW |
 			LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	} else {
 		link_width_cntl |= LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	}
 
-	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
 	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
 
@@ -1293,29 +1586,29 @@
 		WREG16(0x4088, link_cntl2);
 		WREG32(MM_CFGREGS_CNTL, 0);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
-		speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 		speed_cntl |= LC_GEN2_EN_STRAP;
-		WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
 
 	} else {
-		link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
+		link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
 		/* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */
 		if (1)
 			link_width_cntl |= LC_UPCONFIGURE_DIS;
 		else
 			link_width_cntl &= ~LC_UPCONFIGURE_DIS;
-		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+		WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	}
 }
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index c55f950..6a52b20 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -38,6 +38,30 @@
 #define R7XX_MAX_PIPES             8
 #define R7XX_MAX_PIPES_MASK        0xff
 
+/* discrete uvd clocks */
+#define CG_UPLL_FUNC_CNTL				0x718
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
+#	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_REF_DIV(x)				((x) << 16)
+#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_CTLACK_MASK				0x40000000
+#	define UPLL_CTLACK2_MASK			0x80000000
+#define CG_UPLL_FUNC_CNTL_2				0x71c
+#	define UPLL_SW_HILEN(x)				((x) << 0)
+#	define UPLL_SW_LOLEN(x)				((x) << 4)
+#	define UPLL_SW_HILEN2(x)			((x) << 8)
+#	define UPLL_SW_LOLEN2(x)			((x) << 12)
+#	define UPLL_SW_MASK				0x0000FFFF
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000
+#define CG_UPLL_FUNC_CNTL_3				0x720
+#	define UPLL_FB_DIV(x)				((x) << 0)
+#	define UPLL_FB_DIV_MASK				0x01FFFFFF
+
 /* Registers */
 #define	CB_COLOR0_BASE					0x28040
 #define	CB_COLOR1_BASE					0x28044
@@ -112,6 +136,11 @@
 #define DMA_TILING_CONFIG                               0x3ec8
 #define DMA_TILING_CONFIG2                              0xd0b8
 
+/* RV730 only */
+#define UVD_UDEC_TILING_CONFIG                          0xef40
+#define UVD_UDEC_DB_TILING_CONFIG                       0xef44
+#define UVD_UDEC_DBW_TILING_CONFIG                      0xef48
+
 #define	GC_USER_SHADER_PIPE_CONFIG			0x8954
 #define		INACTIVE_QD_PIPES(x)				((x) << 8)
 #define		INACTIVE_QD_PIPES_MASK				0x0000FF00
@@ -671,4 +700,18 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+/* UVD */
+#define UVD_LMI_EXT40_ADDR				0xf498
+#define UVD_VCPU_CHIP_ID				0xf4d4
+#define UVD_VCPU_CACHE_OFFSET0				0xf4d8
+#define UVD_VCPU_CACHE_SIZE0				0xf4dc
+#define UVD_VCPU_CACHE_OFFSET1				0xf4e0
+#define UVD_VCPU_CACHE_SIZE1				0xf4e4
+#define UVD_VCPU_CACHE_OFFSET2				0xf4e8
+#define UVD_VCPU_CACHE_SIZE2				0xf4ec
+#define UVD_LMI_ADDR_EXT				0xf594
+
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+
 #endif
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index bafbe32..aa2c555 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1211,6 +1211,7 @@
 				gb_tile_moden = 0;
 				break;
 			}
+			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
 		}
 	} else if ((rdev->family == CHIP_VERDE) ||
@@ -1451,6 +1452,7 @@
 				gb_tile_moden = 0;
 				break;
 			}
+			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
 		}
 	} else
@@ -1765,9 +1767,13 @@
 
 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
+	WREG32(DMIF_ADDR_CALC, gb_addr_config);
 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
+	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
+	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
 
 	si_tiling_mode_table_init(rdev);
 
@@ -2538,46 +2544,6 @@
 	rv515_vga_render_disable(rdev);
 }
 
-/* SI MC address space is 40 bits */
-static void si_vram_location(struct radeon_device *rdev,
-			     struct radeon_mc *mc, u64 base)
-{
-	mc->vram_start = base;
-	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
-		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
-		mc->real_vram_size = mc->aper_size;
-		mc->mc_vram_size = mc->aper_size;
-	}
-	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
-	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
-			mc->mc_vram_size >> 20, mc->vram_start,
-			mc->vram_end, mc->real_vram_size >> 20);
-}
-
-static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
-{
-	u64 size_af, size_bf;
-
-	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
-	size_bf = mc->vram_start & ~mc->gtt_base_align;
-	if (size_bf > size_af) {
-		if (mc->gtt_size > size_bf) {
-			dev_warn(rdev->dev, "limiting GTT\n");
-			mc->gtt_size = size_bf;
-		}
-		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
-	} else {
-		if (mc->gtt_size > size_af) {
-			dev_warn(rdev->dev, "limiting GTT\n");
-			mc->gtt_size = size_af;
-		}
-		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
-	}
-	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
-	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
-			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
-}
-
 static void si_vram_gtt_location(struct radeon_device *rdev,
 				 struct radeon_mc *mc)
 {
@@ -2587,9 +2553,9 @@
 		mc->real_vram_size = 0xFFC0000000ULL;
 		mc->mc_vram_size = 0xFFC0000000ULL;
 	}
-	si_vram_location(rdev, &rdev->mc, 0);
+	radeon_vram_location(rdev, &rdev->mc, 0);
 	rdev->mc.gtt_base_align = 0;
-	si_gtt_location(rdev, mc);
+	radeon_gtt_location(rdev, mc);
 }
 
 static int si_mc_init(struct radeon_device *rdev)
@@ -4322,14 +4288,6 @@
 		return r;
 	si_gpu_init(rdev);
 
-#if 0
-	r = evergreen_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
-#endif
 	/* allocate rlc buffers */
 	r = si_rlc_init(rdev);
 	if (r) {
@@ -4372,6 +4330,16 @@
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = si_irq_init(rdev);
 	if (r) {
@@ -4429,6 +4397,18 @@
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -4472,6 +4452,8 @@
 	radeon_vm_manager_fini(rdev);
 	si_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
+	radeon_uvd_suspend(rdev);
 	si_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	si_pcie_gart_disable(rdev);
@@ -4557,6 +4539,13 @@
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -4594,9 +4583,6 @@
 
 void si_fini(struct radeon_device *rdev)
 {
-#if 0
-	r600_blit_fini(rdev);
-#endif
 	si_cp_fini(rdev);
 	cayman_dma_fini(rdev);
 	si_irq_fini(rdev);
@@ -4605,6 +4591,7 @@
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	radeon_uvd_fini(rdev);
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
@@ -4634,3 +4621,170 @@
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
+
+static int si_uvd_calc_post_div(unsigned target_freq,
+				unsigned vco_freq,
+				unsigned *div)
+{
+	/* target larger than vco frequency ? */
+	if (vco_freq < target_freq)
+		return -1; /* forget it */
+
+	/* Fclk = Fvco / PDIV */
+	*div = vco_freq / target_freq;
+
+	/* we alway need a frequency less than or equal the target */
+	if ((vco_freq / *div) > target_freq)
+		*div += 1;
+
+	/* dividers above 5 must be even */
+	if (*div > 5 && *div % 2)
+		*div += 1;
+
+	/* out of range ? */
+	if (*div >= 128)
+		return -1; /* forget it */
+
+	return vco_freq / *div;
+}
+
+static int si_uvd_send_upll_ctlreq(struct radeon_device *rdev)
+{
+	unsigned i;
+
+	/* assert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	/* deassert UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+	return 0;
+}
+
+int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
+{
+	/* start off with something large */
+	int optimal_diff_score = 0x7FFFFFF;
+	unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
+	unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
+	unsigned vco_freq;
+	int r;
+
+	/* loop through vco from low to high */
+	for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
+		unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
+		int calc_clk, diff_score, diff_vclk, diff_dclk;
+		unsigned vclk_div, dclk_div;
+
+		/* fb div out of range ? */
+		if (fb_div > 0x03FFFFFF)
+			break; /* it can oly get worse */
+
+		/* calc vclk with current vco freq. */
+		calc_clk = si_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_vclk = vclk - calc_clk;
+
+		/* calc dclk with current vco freq. */
+		calc_clk = si_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
+		if (calc_clk == -1)
+			break; /* vco is too big, it has to stop. */
+		diff_dclk = dclk - calc_clk;
+
+		/* determine if this vco setting is better than current optimal settings */
+		diff_score = abs(diff_vclk) + abs(diff_dclk);
+		if (diff_score < optimal_diff_score) {
+			optimal_fb_div = fb_div;
+			optimal_vclk_div = vclk_div;
+			optimal_dclk_div = dclk_div;
+			optimal_vco_freq = vco_freq;
+			optimal_diff_score = diff_score;
+			if (optimal_diff_score == 0)
+				break; /* it can't get better than this */
+		}
+	}
+
+	/* set RESET_ANTI_MUX to 0 */
+	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
+
+	/* set VCO_MODE to 1 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
+
+	/* toggle UPLL_SLEEP to 1 then back to 0 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
+
+	/* deassert UPLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(1);
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* put PLL in bypass mode */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
+
+	r = si_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* assert UPLL_RESET again */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* disable spread spectrum. */
+	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+	/* set feedback divider */
+	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
+
+	/* set ref divider to 0 */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
+
+	if (optimal_vco_freq < 187500)
+		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
+	else
+		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
+
+	/* set PDIV_A and PDIV_B */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
+		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* switch from bypass mode to normal mode */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+	r = si_uvd_send_upll_ctlreq(rdev);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 23fc08f..042b91d 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -29,6 +29,35 @@
 #define TAHITI_GB_ADDR_CONFIG_GOLDEN        0x12011003
 #define VERDE_GB_ADDR_CONFIG_GOLDEN         0x12010002
 
+/* discrete uvd clocks */
+#define	CG_UPLL_FUNC_CNTL				0x634
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
+#	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_VCO_MODE_MASK			0x00000600
+#	define UPLL_REF_DIV_MASK			0x001F0000
+#	define UPLL_CTLACK_MASK				0x40000000
+#	define UPLL_CTLACK2_MASK			0x80000000
+#define	CG_UPLL_FUNC_CNTL_2				0x638
+#	define UPLL_PDIV_A(x)				((x) << 0)
+#	define UPLL_PDIV_A_MASK				0x0000007F
+#	define UPLL_PDIV_B(x)				((x) << 8)
+#	define UPLL_PDIV_B_MASK				0x00007F00
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000
+#define	CG_UPLL_FUNC_CNTL_3				0x63C
+#	define UPLL_FB_DIV(x)				((x) << 0)
+#	define UPLL_FB_DIV_MASK				0x01FFFFFF
+#define	CG_UPLL_FUNC_CNTL_4                             0x644
+#	define UPLL_SPARE_ISPARE9			0x00020000
+#define	CG_UPLL_FUNC_CNTL_5				0x648
+#	define RESET_ANTI_MUX_MASK			0x00000200
+#define	CG_UPLL_SPREAD_SPECTRUM				0x650
+#	define SSEN_MASK				0x00000001
+
 #define	CG_MULT_THERMAL_STATUS					0x714
 #define		ASIC_MAX_TEMP(x)				((x) << 0)
 #define		ASIC_MAX_TEMP_MASK				0x000001ff
@@ -65,6 +94,8 @@
 
 #define DMIF_ADDR_CONFIG  				0xBD4
 
+#define DMIF_ADDR_CALC  				0xC00
+
 #define	SRBM_STATUS				        0xE50
 #define		GRBM_RQ_PENDING 			(1 << 5)
 #define		VMC_BUSY 				(1 << 8)
@@ -798,6 +829,15 @@
 #       define THREAD_TRACE_FINISH                      (55 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_UDEC_ADDR_CONFIG				0xEF4C
+#define UVD_UDEC_DB_ADDR_CONFIG				0xEF50
+#define UVD_UDEC_DBW_ADDR_CONFIG			0xEF54
+#define UVD_RBC_RB_RPTR					0xF690
+#define UVD_RBC_RB_WPTR					0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index eeda917..321d4ac 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -918,6 +918,7 @@
 #define RADEON_CS_RING_GFX          0
 #define RADEON_CS_RING_COMPUTE      1
 #define RADEON_CS_RING_DMA          2
+#define RADEON_CS_RING_UVD          3
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
 
@@ -972,6 +973,13 @@
 #define RADEON_INFO_MAX_SE		0x12
 /* max SH per SE */
 #define RADEON_INFO_MAX_SH_PER_SE	0x13
+/* fast fb access is enabled */
+#define RADEON_INFO_FASTFB_WORKING	0x14
+/* query if a RADEON_CS_RING_* submission is supported */
+#define RADEON_INFO_RING_WORKING	0x15
+/* SI tile mode array */
+#define RADEON_INFO_SI_TILE_MODE_ARRAY	0x16
+
 
 struct drm_radeon_info {
 	uint32_t		request;
@@ -979,4 +987,22 @@
 	uint64_t		value;
 };
 
+/* Those correspond to the tile index to use, this is to explicitly state
+ * the API that is implicitly defined by the tile mode array.
+ */
+#define SI_TILE_MODE_COLOR_LINEAR_ALIGNED	8
+#define SI_TILE_MODE_COLOR_1D			13
+#define SI_TILE_MODE_COLOR_1D_SCANOUT		9
+#define SI_TILE_MODE_COLOR_2D_8BPP		14
+#define SI_TILE_MODE_COLOR_2D_16BPP		15
+#define SI_TILE_MODE_COLOR_2D_32BPP		16
+#define SI_TILE_MODE_COLOR_2D_64BPP		17
+#define SI_TILE_MODE_COLOR_2D_SCANOUT_16BPP	11
+#define SI_TILE_MODE_COLOR_2D_SCANOUT_32BPP	12
+#define SI_TILE_MODE_DEPTH_STENCIL_1D		4
+#define SI_TILE_MODE_DEPTH_STENCIL_2D		0
+#define SI_TILE_MODE_DEPTH_STENCIL_2D_2AA	3
+#define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA	3
+#define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA	2
+
 #endif