Merge branch 'edp-training-fixes' into drm-intel-next

Conflicts:
	drivers/gpu/drm/i915/intel_dp.c

Just whitespace change conflicts
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7425e5c..fe39c35 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1319,6 +1319,7 @@
 #define HDMI_IDENTIFIER 0x000C03
 #define AUDIO_BLOCK	0x01
 #define VENDOR_BLOCK    0x03
+#define SPEAKER_BLOCK	0x04
 #define EDID_BASIC_AUDIO	(1 << 6)
 
 /**
@@ -1347,6 +1348,176 @@
 }
 EXPORT_SYMBOL(drm_find_cea_extension);
 
+static void
+parse_hdmi_vsdb(struct drm_connector *connector, uint8_t *db)
+{
+	connector->eld[5] |= (db[6] >> 7) << 1;  /* Supports_AI */
+
+	connector->dvi_dual = db[6] & 1;
+	connector->max_tmds_clock = db[7] * 5;
+
+	connector->latency_present[0] = db[8] >> 7;
+	connector->latency_present[1] = (db[8] >> 6) & 1;
+	connector->video_latency[0] = db[9];
+	connector->audio_latency[0] = db[10];
+	connector->video_latency[1] = db[11];
+	connector->audio_latency[1] = db[12];
+
+	DRM_LOG_KMS("HDMI: DVI dual %d, "
+		    "max TMDS clock %d, "
+		    "latency present %d %d, "
+		    "video latency %d %d, "
+		    "audio latency %d %d\n",
+		    connector->dvi_dual,
+		    connector->max_tmds_clock,
+	      (int) connector->latency_present[0],
+	      (int) connector->latency_present[1],
+		    connector->video_latency[0],
+		    connector->video_latency[1],
+		    connector->audio_latency[0],
+		    connector->audio_latency[1]);
+}
+
+static void
+monitor_name(struct detailed_timing *t, void *data)
+{
+	if (t->data.other_data.type == EDID_DETAIL_MONITOR_NAME)
+		*(u8 **)data = t->data.other_data.data.str.str;
+}
+
+/**
+ * drm_edid_to_eld - build ELD from EDID
+ * @connector: connector corresponding to the HDMI/DP sink
+ * @edid: EDID to parse
+ *
+ * Fill the ELD (EDID-Like Data) buffer for passing to the audio driver.
+ * Some ELD fields are left to the graphics driver caller:
+ * - Conn_Type
+ * - HDCP
+ * - Port_ID
+ */
+void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
+{
+	uint8_t *eld = connector->eld;
+	u8 *cea;
+	u8 *name;
+	u8 *db;
+	int sad_count = 0;
+	int mnl;
+	int dbl;
+
+	memset(eld, 0, sizeof(connector->eld));
+
+	cea = drm_find_cea_extension(edid);
+	if (!cea) {
+		DRM_DEBUG_KMS("ELD: no CEA Extension found\n");
+		return;
+	}
+
+	name = NULL;
+	drm_for_each_detailed_block((u8 *)edid, monitor_name, &name);
+	for (mnl = 0; name && mnl < 13; mnl++) {
+		if (name[mnl] == 0x0a)
+			break;
+		eld[20 + mnl] = name[mnl];
+	}
+	eld[4] = (cea[1] << 5) | mnl;
+	DRM_DEBUG_KMS("ELD monitor %s\n", eld + 20);
+
+	eld[0] = 2 << 3;		/* ELD version: 2 */
+
+	eld[16] = edid->mfg_id[0];
+	eld[17] = edid->mfg_id[1];
+	eld[18] = edid->prod_code[0];
+	eld[19] = edid->prod_code[1];
+
+	for (db = cea + 4; db < cea + cea[2]; db += dbl + 1) {
+		dbl = db[0] & 0x1f;
+
+		switch ((db[0] & 0xe0) >> 5) {
+		case AUDIO_BLOCK:	/* Audio Data Block, contains SADs */
+			sad_count = dbl / 3;
+			memcpy(eld + 20 + mnl, &db[1], dbl);
+			break;
+		case SPEAKER_BLOCK:	/* Speaker Allocation Data Block */
+			eld[7] = db[1];
+			break;
+		case VENDOR_BLOCK:
+			/* HDMI Vendor-Specific Data Block */
+			if (db[1] == 0x03 && db[2] == 0x0c && db[3] == 0)
+				parse_hdmi_vsdb(connector, db);
+			break;
+		default:
+			break;
+		}
+	}
+	eld[5] |= sad_count << 4;
+	eld[2] = (20 + mnl + sad_count * 3 + 3) / 4;
+
+	DRM_DEBUG_KMS("ELD size %d, SAD count %d\n", (int)eld[2], sad_count);
+}
+EXPORT_SYMBOL(drm_edid_to_eld);
+
+/**
+ * drm_av_sync_delay - HDMI/DP sink audio-video sync delay in millisecond
+ * @connector: connector associated with the HDMI/DP sink
+ * @mode: the display mode
+ */
+int drm_av_sync_delay(struct drm_connector *connector,
+		      struct drm_display_mode *mode)
+{
+	int i = !!(mode->flags & DRM_MODE_FLAG_INTERLACE);
+	int a, v;
+
+	if (!connector->latency_present[0])
+		return 0;
+	if (!connector->latency_present[1])
+		i = 0;
+
+	a = connector->audio_latency[i];
+	v = connector->video_latency[i];
+
+	/*
+	 * HDMI/DP sink doesn't support audio or video?
+	 */
+	if (a == 255 || v == 255)
+		return 0;
+
+	/*
+	 * Convert raw EDID values to millisecond.
+	 * Treat unknown latency as 0ms.
+	 */
+	if (a)
+		a = min(2 * (a - 1), 500);
+	if (v)
+		v = min(2 * (v - 1), 500);
+
+	return max(v - a, 0);
+}
+EXPORT_SYMBOL(drm_av_sync_delay);
+
+/**
+ * drm_select_eld - select one ELD from multiple HDMI/DP sinks
+ * @encoder: the encoder just changed display mode
+ * @mode: the adjusted display mode
+ *
+ * It's possible for one encoder to be associated with multiple HDMI/DP sinks.
+ * The policy is now hard coded to simply use the first HDMI/DP sink's ELD.
+ */
+struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
+				     struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder && connector->eld[0])
+			return connector;
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_select_eld);
+
 /**
  * drm_detect_hdmi_monitor - detect whether monitor is hdmi.
  * @edid: monitor EDID information
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 186d62e..396e60c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -285,6 +285,94 @@
 }
 EXPORT_SYMBOL(drm_gem_handle_create);
 
+
+/**
+ * drm_gem_free_mmap_offset - release a fake mmap offset for an object
+ * @obj: obj in question
+ *
+ * This routine frees fake offsets allocated by drm_gem_create_mmap_offset().
+ */
+void
+drm_gem_free_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list = &obj->map_list;
+
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+	drm_mm_put_block(list->file_offset_node);
+	kfree(list->map);
+	list->map = NULL;
+}
+EXPORT_SYMBOL(drm_gem_free_mmap_offset);
+
+/**
+ * drm_gem_create_mmap_offset - create a fake mmap offset for an object
+ * @obj: obj in question
+ *
+ * GEM memory mapping works by handing back to userspace a fake mmap offset
+ * it can use in a subsequent mmap(2) call.  The DRM core code then looks
+ * up the object based on the offset and sets up the various memory mapping
+ * structures.
+ *
+ * This routine allocates and attaches a fake offset for @obj.
+ */
+int
+drm_gem_create_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list;
+	struct drm_local_map *map;
+	int ret = 0;
+
+	/* Set the object up for mmap'ing */
+	list = &obj->map_list;
+	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
+	if (!list->map)
+		return -ENOMEM;
+
+	map = list->map;
+	map->type = _DRM_GEM;
+	map->size = obj->size;
+	map->handle = obj;
+
+	/* Get a DRM GEM mmap offset allocated... */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+			obj->size / PAGE_SIZE, 0, 0);
+
+	if (!list->file_offset_node) {
+		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+		ret = -ENOSPC;
+		goto out_free_list;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+			obj->size / PAGE_SIZE, 0);
+	if (!list->file_offset_node) {
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
+	if (ret) {
+		DRM_ERROR("failed to add to map hash\n");
+		goto out_free_mm;
+	}
+
+	return 0;
+
+out_free_mm:
+	drm_mm_put_block(list->file_offset_node);
+out_free_list:
+	kfree(list->map);
+	list->map = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_create_mmap_offset);
+
 /** Returns a reference to the object named by the handle. */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
index d3e8c54..1ca799a 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -227,7 +227,7 @@
 	default:
 		DRM_DEBUG_KMS("ch701x not detected, got %d: from %s "
 			      "slave %d.\n",
-			      val, adapter->name,dvo->slave_addr);
+			      val, adapter->name, dvo->slave_addr);
 		goto fail;
 	}
 
diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c
index 7eaa94e..4a03660 100644
--- a/drivers/gpu/drm/i915/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c
@@ -111,7 +111,7 @@
 /** Reads an 8 bit register */
 static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
 {
-	struct ch7xxx_priv *ch7xxx= dvo->dev_priv;
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
 	struct i2c_adapter *adapter = dvo->i2c_bus;
 	u8 out_buf[2];
 	u8 in_buf[2];
@@ -303,7 +303,7 @@
 
 	for (i = 0; i < CH7xxx_NUM_REGS; i++) {
 		uint8_t val;
-		if ((i % 8) == 0 )
+		if ((i % 8) == 0)
 			DRM_LOG_KMS("\n %02X: ", i);
 		ch7xxx_readb(dvo, i, &val);
 		DRM_LOG_KMS("%02X ", val);
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
index a12ed94..04f2893 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/dvo_ivch.c
@@ -344,8 +344,8 @@
 			   (adjusted_mode->hdisplay - 1)) >> 2;
 		y_ratio = (((mode->vdisplay - 1) << 16) /
 			   (adjusted_mode->vdisplay - 1)) >> 2;
-		ivch_write (dvo, VR42, x_ratio);
-		ivch_write (dvo, VR41, y_ratio);
+		ivch_write(dvo, VR42, x_ratio);
+		ivch_write(dvo, VR41, y_ratio);
 	} else {
 		vr01 &= ~VR01_PANEL_FIT_ENABLE;
 		vr40 &= ~VR40_CLOCK_GATING_ENABLE;
@@ -410,7 +410,7 @@
 	}
 }
 
-struct intel_dvo_dev_ops ivch_ops= {
+struct intel_dvo_dev_ops ivch_ops = {
 	.init = ivch_init,
 	.dpms = ivch_dpms,
 	.mode_valid = ivch_mode_valid,
diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c
index e4b4091..a0b13a6 100644
--- a/drivers/gpu/drm/i915/dvo_sil164.c
+++ b/drivers/gpu/drm/i915/dvo_sil164.c
@@ -104,7 +104,7 @@
 
 static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, uint8_t ch)
 {
-	struct sil164_priv *sil= dvo->dev_priv;
+	struct sil164_priv *sil = dvo->dev_priv;
 	struct i2c_adapter *adapter = dvo->i2c_bus;
 	uint8_t out_buf[2];
 	struct i2c_msg msg = {
diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c
index 8ab2855..aa2cd3e 100644
--- a/drivers/gpu/drm/i915/dvo_tfp410.c
+++ b/drivers/gpu/drm/i915/dvo_tfp410.c
@@ -56,7 +56,7 @@
 #define TFP410_CTL_2_MDI	(1<<0)
 
 #define TFP410_CTL_3		0x0A
-#define TFP410_CTL_3_DK_MASK 	(0x7<<5)
+#define TFP410_CTL_3_DK_MASK	(0x7<<5)
 #define TFP410_CTL_3_DK		(1<<5)
 #define TFP410_CTL_3_DKEN	(1<<4)
 #define TFP410_CTL_3_CTL_MASK	(0x7<<1)
@@ -225,12 +225,12 @@
 			    struct drm_display_mode *mode,
 			    struct drm_display_mode *adjusted_mode)
 {
-    /* As long as the basics are set up, since we don't have clock dependencies
-     * in the mode setup, we can just leave the registers alone and everything
-     * will work fine.
-     */
-    /* don't do much */
-    return;
+	/* As long as the basics are set up, since we don't have clock dependencies
+	* in the mode setup, we can just leave the registers alone and everything
+	* will work fine.
+	*/
+	/* don't do much */
+	return;
 }
 
 /* set the tfp410 power state */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c395a5..8e95d66 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -98,12 +98,12 @@
 
 static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
 {
-    switch (obj->tiling_mode) {
-    default:
-    case I915_TILING_NONE: return " ";
-    case I915_TILING_X: return "X";
-    case I915_TILING_Y: return "Y";
-    }
+	switch (obj->tiling_mode) {
+	default:
+	case I915_TILING_NONE: return " ";
+	case I915_TILING_X: return "X";
+	case I915_TILING_Y: return "Y";
+	}
 }
 
 static const char *cache_level_str(int type)
@@ -217,7 +217,7 @@
 			++mappable_count; \
 		} \
 	} \
-} while(0)
+} while (0)
 
 static int i915_gem_object_info(struct seq_file *m, void* data)
 {
@@ -1293,12 +1293,12 @@
 	char buf[80];
 	int len;
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "wedged :  %d\n",
 		       atomic_read(&dev_priv->mm.wedged));
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1314,7 +1314,7 @@
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
@@ -1357,11 +1357,11 @@
 	char buf[80];
 	int len;
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "max freq: %d\n", dev_priv->max_delay * 50);
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1378,7 +1378,7 @@
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
@@ -1432,12 +1432,12 @@
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "%d\n", (snpcr & GEN6_MBC_SNPCR_MASK) >>
 		       GEN6_MBC_SNPCR_SHIFT);
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1455,7 +1455,7 @@
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8a3942c..d76da38 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -884,7 +884,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
 	if (!dev_priv->bridge_dev) {
 		DRM_ERROR("bridge device not found\n");
 		return -1;
@@ -1730,10 +1730,10 @@
  */
 unsigned long i915_read_mch_val(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	unsigned long chipset_val, graphics_val, ret = 0;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev)
 		goto out_unlock;
 	dev_priv = i915_mch_dev;
@@ -1744,9 +1744,9 @@
 	ret = chipset_val + graphics_val;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -1757,10 +1757,10 @@
  */
 bool i915_gpu_raise(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1771,9 +1771,9 @@
 		dev_priv->max_delay--;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -1785,10 +1785,10 @@
  */
 bool i915_gpu_lower(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1799,9 +1799,9 @@
 		dev_priv->max_delay++;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -1812,10 +1812,10 @@
  */
 bool i915_gpu_busy(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = false;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev)
 		goto out_unlock;
 	dev_priv = i915_mch_dev;
@@ -1823,9 +1823,9 @@
 	ret = dev_priv->busy;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
 
@@ -1837,10 +1837,10 @@
  */
 bool i915_gpu_turbo_disable(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1853,9 +1853,9 @@
 		ret = false;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
@@ -1948,7 +1948,7 @@
 
 	agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
 
-        dev_priv->mm.gtt_mapping =
+	dev_priv->mm.gtt_mapping =
 		io_mapping_create_wc(dev->agp->base, agp_size);
 	if (dev_priv->mm.gtt_mapping == NULL) {
 		ret = -EIO;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f07e425..c96b019 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -294,7 +294,7 @@
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE	0x1c00
 #define INTEL_PCH_PPT_DEVICE_ID_TYPE	0x1e00
 
-void intel_detect_pch (struct drm_device *dev)
+void intel_detect_pch(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pch;
@@ -377,7 +377,7 @@
 
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES ) {
+	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
 		int loop = 500;
 		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
 		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
@@ -770,12 +770,12 @@
 }
 
 static const struct dev_pm_ops i915_pm_ops = {
-     .suspend = i915_pm_suspend,
-     .resume = i915_pm_resume,
-     .freeze = i915_pm_freeze,
-     .thaw = i915_pm_thaw,
-     .poweroff = i915_pm_poweroff,
-     .restore = i915_pm_resume,
+	.suspend = i915_pm_suspend,
+	.resume = i915_pm_resume,
+	.freeze = i915_pm_freeze,
+	.thaw = i915_pm_thaw,
+	.poweroff = i915_pm_poweroff,
+	.restore = i915_pm_resume,
 };
 
 static struct vm_operations_struct i915_gem_vm_ops = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e6dd19e..b2ac202 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -209,6 +209,8 @@
 			     struct drm_display_mode *adjusted_mode,
 			     int x, int y,
 			     struct drm_framebuffer *old_fb);
+	void (*write_eld)(struct drm_connector *connector,
+			  struct drm_crtc *crtc);
 	void (*fdi_link_train)(struct drm_crtc *crtc);
 	void (*init_clock_gating)(struct drm_device *dev);
 	void (*init_pch_clock_gating)(struct drm_device *dev);
@@ -226,26 +228,26 @@
 
 struct intel_device_info {
 	u8 gen;
-	u8 is_mobile : 1;
-	u8 is_i85x : 1;
-	u8 is_i915g : 1;
-	u8 is_i945gm : 1;
-	u8 is_g33 : 1;
-	u8 need_gfx_hws : 1;
-	u8 is_g4x : 1;
-	u8 is_pineview : 1;
-	u8 is_broadwater : 1;
-	u8 is_crestline : 1;
-	u8 is_ivybridge : 1;
-	u8 has_fbc : 1;
-	u8 has_pipe_cxsr : 1;
-	u8 has_hotplug : 1;
-	u8 cursor_needs_physical : 1;
-	u8 has_overlay : 1;
-	u8 overlay_needs_physical : 1;
-	u8 supports_tv : 1;
-	u8 has_bsd_ring : 1;
-	u8 has_blt_ring : 1;
+	u8 is_mobile:1;
+	u8 is_i85x:1;
+	u8 is_i915g:1;
+	u8 is_i945gm:1;
+	u8 is_g33:1;
+	u8 need_gfx_hws:1;
+	u8 is_g4x:1;
+	u8 is_pineview:1;
+	u8 is_broadwater:1;
+	u8 is_crestline:1;
+	u8 is_ivybridge:1;
+	u8 has_fbc:1;
+	u8 has_pipe_cxsr:1;
+	u8 has_hotplug:1;
+	u8 cursor_needs_physical:1;
+	u8 has_overlay:1;
+	u8 overlay_needs_physical:1;
+	u8 supports_tv:1;
+	u8 has_bsd_ring:1;
+	u8 has_blt_ring:1;
 };
 
 enum no_fbc_reason {
@@ -757,19 +759,19 @@
 	 * (has pending rendering), and is not set if it's on inactive (ready
 	 * to be unbound).
 	 */
-	unsigned int active : 1;
+	unsigned int active:1;
 
 	/**
 	 * This is set if the object has been written to since last bound
 	 * to the GTT
 	 */
-	unsigned int dirty : 1;
+	unsigned int dirty:1;
 
 	/**
 	 * This is set if the object has been written to since the last
 	 * GPU flush.
 	 */
-	unsigned int pending_gpu_write : 1;
+	unsigned int pending_gpu_write:1;
 
 	/**
 	 * Fence register bits (if any) for this object.  Will be set
@@ -778,18 +780,18 @@
 	 *
 	 * Size: 4 bits for 16 fences + sign (for FENCE_REG_NONE)
 	 */
-	signed int fence_reg : 5;
+	signed int fence_reg:5;
 
 	/**
 	 * Advice: are the backing pages purgeable?
 	 */
-	unsigned int madv : 2;
+	unsigned int madv:2;
 
 	/**
 	 * Current tiling mode for the object.
 	 */
-	unsigned int tiling_mode : 2;
-	unsigned int tiling_changed : 1;
+	unsigned int tiling_mode:2;
+	unsigned int tiling_changed:1;
 
 	/** How many users have pinned this object in GTT space. The following
 	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
@@ -800,22 +802,22 @@
 	 *
 	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
 	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count : 4;
+	unsigned int pin_count:4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
 	 */
-	unsigned int map_and_fenceable : 1;
+	unsigned int map_and_fenceable:1;
 
 	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
 	 */
-	unsigned int fault_mappable : 1;
-	unsigned int pin_mappable : 1;
+	unsigned int fault_mappable:1;
+	unsigned int pin_mappable:1;
 
 	/*
 	 * Is the GPU currently using a fence to access this buffer,
@@ -1054,7 +1056,7 @@
 void
 i915_disable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
 
-void intel_enable_asle (struct drm_device *dev);
+void intel_enable_asle(struct drm_device *dev);
 
 #ifdef CONFIG_DEBUG_FS
 extern void i915_destroy_error_state(struct drm_device *dev);
@@ -1144,7 +1146,7 @@
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
 int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-			  uint32_t handle);			  
+			  uint32_t handle);
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -1301,8 +1303,8 @@
 extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
 extern void ironlake_enable_rc6(struct drm_device *dev);
 extern void gen6_set_rps(struct drm_device *dev, u8 val);
-extern void intel_detect_pch (struct drm_device *dev);
-extern int intel_trans_dp_port_sel (struct drm_crtc *crtc);
+extern void intel_detect_pch(struct drm_device *dev);
+extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 
 /* overlay */
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a546a71..f0f885f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -179,7 +179,7 @@
 	mutex_unlock(&dev->struct_mutex);
 
 	args->aper_size = dev_priv->mm.gtt_total;
-	args->aper_available_size = args->aper_size -pinned;
+	args->aper_available_size = args->aper_size - pinned;
 
 	return 0;
 }
@@ -1265,74 +1265,6 @@
 }
 
 /**
- * i915_gem_create_mmap_offset - create a fake mmap offset for an object
- * @obj: obj in question
- *
- * GEM memory mapping works by handing back to userspace a fake mmap offset
- * it can use in a subsequent mmap(2) call.  The DRM core code then looks
- * up the object based on the offset and sets up the various memory mapping
- * structures.
- *
- * This routine allocates and attaches a fake offset for @obj.
- */
-static int
-i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list;
-	struct drm_local_map *map;
-	int ret = 0;
-
-	/* Set the object up for mmap'ing */
-	list = &obj->base.map_list;
-	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
-	if (!list->map)
-		return -ENOMEM;
-
-	map = list->map;
-	map->type = _DRM_GEM;
-	map->size = obj->base.size;
-	map->handle = obj;
-
-	/* Get a DRM GEM mmap offset allocated... */
-	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
-						    obj->base.size / PAGE_SIZE,
-						    0, 0);
-	if (!list->file_offset_node) {
-		DRM_ERROR("failed to allocate offset for bo %d\n",
-			  obj->base.name);
-		ret = -ENOSPC;
-		goto out_free_list;
-	}
-
-	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
-						  obj->base.size / PAGE_SIZE,
-						  0);
-	if (!list->file_offset_node) {
-		ret = -ENOMEM;
-		goto out_free_list;
-	}
-
-	list->hash.key = list->file_offset_node->start;
-	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
-	if (ret) {
-		DRM_ERROR("failed to add to map hash\n");
-		goto out_free_mm;
-	}
-
-	return 0;
-
-out_free_mm:
-	drm_mm_put_block(list->file_offset_node);
-out_free_list:
-	kfree(list->map);
-	list->map = NULL;
-
-	return ret;
-}
-
-/**
  * i915_gem_release_mmap - remove physical page mappings
  * @obj: obj in question
  *
@@ -1360,19 +1292,6 @@
 	obj->fault_mappable = false;
 }
 
-static void
-i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list = &obj->base.map_list;
-
-	drm_ht_remove_item(&mm->offset_hash, &list->hash);
-	drm_mm_put_block(list->file_offset_node);
-	kfree(list->map);
-	list->map = NULL;
-}
-
 static uint32_t
 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
 {
@@ -1485,7 +1404,7 @@
 	}
 
 	if (!obj->base.map_list.map) {
-		ret = i915_gem_create_mmap_offset(obj);
+		ret = drm_gem_create_mmap_offset(&obj->base);
 		if (ret)
 			goto out;
 	}
@@ -1856,7 +1775,7 @@
 	 * lost bo to the inactive list.
 	 */
 	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		obj= list_first_entry(&dev_priv->mm.flushing_list,
+		obj = list_first_entry(&dev_priv->mm.flushing_list,
 				      struct drm_i915_gem_object,
 				      mm_list);
 
@@ -1922,7 +1841,7 @@
 	while (!list_empty(&ring->active_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj= list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->active_list,
 				      struct drm_i915_gem_object,
 				      ring_list);
 
@@ -2882,7 +2801,7 @@
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
-		(obj->gtt_space->start & (fence_alignment -1)) == 0;
+		(obj->gtt_space->start & (fence_alignment - 1)) == 0;
 
 	mappable =
 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
@@ -3598,7 +3517,7 @@
 			 */
 			request = kzalloc(sizeof(*request), GFP_KERNEL);
 			if (request)
-				ret = i915_add_request(obj->ring, NULL,request);
+				ret = i915_add_request(obj->ring, NULL, request);
 			else
 				ret = -ENOMEM;
 		}
@@ -3623,7 +3542,7 @@
 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
-    return i915_gem_ring_throttle(dev, file_priv);
+	return i915_gem_ring_throttle(dev, file_priv);
 }
 
 int
@@ -3752,7 +3671,7 @@
 	trace_i915_gem_object_destroy(obj);
 
 	if (obj->base.map_list.map)
-		i915_gem_free_mmap_offset(obj);
+		drm_gem_free_mmap_offset(&obj->base);
 
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index 8da1899..ac90875 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -72,7 +72,7 @@
 			break;
 		} else if (!obj->active ||
 			   (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 ||
-			   list_empty(&obj->gpu_write_list)){
+			   list_empty(&obj->gpu_write_list)) {
 			DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n",
 				  obj,
 				  obj->active,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index da05a26..ead5d00 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -122,7 +122,7 @@
 			goto found;
 	}
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (! obj->base.write_domain || obj->pin_count)
+		if (!obj->base.write_domain || obj->pin_count)
 			continue;
 
 		if (mark_free(obj, &unwind_list))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4934cf8..3693e83 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -784,7 +784,8 @@
 	}
 
 	from->sync_seqno[idx] = seqno;
-	return intel_ring_sync(to, from, seqno - 1);
+
+	return to->sync_to(to, from, seqno - 1);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index adeab2a..944d712 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -711,7 +711,7 @@
 
 	page_count = src->base.size / PAGE_SIZE;
 
-	dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC);
+	dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC);
 	if (dst == NULL)
 		return NULL;
 
@@ -1493,7 +1493,7 @@
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-				    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+				    DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -1541,7 +1541,7 @@
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-				     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+				     DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_mem.c b/drivers/gpu/drm/i915/i915_mem.c
index 83b7b81..cc8f6d4 100644
--- a/drivers/gpu/drm/i915/i915_mem.c
+++ b/drivers/gpu/drm/i915/i915_mem.c
@@ -202,7 +202,7 @@
 	blocks->next = blocks->prev = *heap;
 
 	memset(*heap, 0, sizeof(**heap));
-	(*heap)->file_priv = (struct drm_file *) - 1;
+	(*heap)->file_priv = (struct drm_file *) -1;
 	(*heap)->next = (*heap)->prev = blocks;
 	return 0;
 }
@@ -359,19 +359,19 @@
 	return init_heap(heap, initheap->start, initheap->size);
 }
 
-int i915_mem_destroy_heap( struct drm_device *dev, void *data,
-			   struct drm_file *file_priv )
+int i915_mem_destroy_heap(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_mem_destroy_heap_t *destroyheap = data;
 	struct mem_block **heap;
 
-	if ( !dev_priv ) {
-		DRM_ERROR( "called with no initialization\n" );
+	if (!dev_priv) {
+		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
 
-	heap = get_heap( dev_priv, destroyheap->region );
+	heap = get_heap(dev_priv, destroyheap->region);
 	if (!heap) {
 		DRM_ERROR("get_heap failed");
 		return -EFAULT;
@@ -382,6 +382,6 @@
 		return -EFAULT;
 	}
 
-	i915_mem_takedown( heap );
+	i915_mem_takedown(heap);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 793cae7..28a313a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -156,7 +156,7 @@
 #define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
 #define   MI_SUSPEND_FLUSH_EN	(1<<0)
 #define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
-#define MI_OVERLAY_FLIP		MI_INSTR(0x11,0)
+#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
 #define   MI_OVERLAY_CONTINUE	(0x0<<21)
 #define   MI_OVERLAY_ON		(0x1<<21)
 #define   MI_OVERLAY_OFF	(0x2<<21)
@@ -194,6 +194,13 @@
 #define  MI_SEMAPHORE_UPDATE	    (1<<21)
 #define  MI_SEMAPHORE_COMPARE	    (1<<20)
 #define  MI_SEMAPHORE_REGISTER	    (1<<18)
+#define  MI_SEMAPHORE_SYNC_RV	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_RB	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VR	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VB	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BR	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BV	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_INVALID  (1<<0)
 /*
  * 3D instructions used by the kernel
  */
@@ -296,6 +303,12 @@
 #define RING_CTL(base)		((base)+0x3c)
 #define RING_SYNC_0(base)	((base)+0x40)
 #define RING_SYNC_1(base)	((base)+0x44)
+#define GEN6_RVSYNC (RING_SYNC_0(RENDER_RING_BASE))
+#define GEN6_RBSYNC (RING_SYNC_1(RENDER_RING_BASE))
+#define GEN6_VRSYNC (RING_SYNC_1(GEN6_BSD_RING_BASE))
+#define GEN6_VBSYNC (RING_SYNC_0(GEN6_BSD_RING_BASE))
+#define GEN6_BRSYNC (RING_SYNC_0(BLT_RING_BASE))
+#define GEN6_BVSYNC (RING_SYNC_1(BLT_RING_BASE))
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
@@ -470,7 +483,7 @@
 
 /* Enables non-sequential data reads through arbiter
  */
-#define   MI_ARB_DUAL_DATA_PHASE_DISABLE       	(1 << 9)
+#define   MI_ARB_DUAL_DATA_PHASE_DISABLE	(1 << 9)
 
 /* Disable FSB snooping of cacheable write cycles from binner/render
  * command stream
@@ -626,7 +639,7 @@
 
 #define ILK_DISPLAY_CHICKEN1	0x42000
 #define   ILK_FBCQ_DIS		(1<<22)
-#define   ILK_PABSTRETCH_DIS 	(1<<21)
+#define	  ILK_PABSTRETCH_DIS	(1<<21)
 
 
 /*
@@ -2358,7 +2371,7 @@
 
 #define DSPFW1			0x70034
 #define   DSPFW_SR_SHIFT	23
-#define   DSPFW_SR_MASK 	(0x1ff<<23)
+#define   DSPFW_SR_MASK		(0x1ff<<23)
 #define   DSPFW_CURSORB_SHIFT	16
 #define   DSPFW_CURSORB_MASK	(0x3f<<16)
 #define   DSPFW_PLANEB_SHIFT	8
@@ -3493,4 +3506,29 @@
 #define GEN6_PCODE_DATA				0x138128
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 
+#define G4X_AUD_VID_DID			0x62020
+#define INTEL_AUDIO_DEVCL		0x808629FB
+#define INTEL_AUDIO_DEVBLC		0x80862801
+#define INTEL_AUDIO_DEVCTG		0x80862802
+
+#define G4X_AUD_CNTL_ST			0x620B4
+#define G4X_ELDV_DEVCL_DEVBLC		(1 << 13)
+#define G4X_ELDV_DEVCTG			(1 << 14)
+#define G4X_ELD_ADDR			(0xf << 5)
+#define G4X_ELD_ACK			(1 << 4)
+#define G4X_HDMIW_HDMIEDID		0x6210C
+
+#define GEN5_HDMIW_HDMIEDID_A		0xE2050
+#define GEN5_AUD_CNTL_ST_A		0xE20B4
+#define GEN5_ELD_BUFFER_SIZE		(0x1f << 10)
+#define GEN5_ELD_ADDRESS		(0x1f << 5)
+#define GEN5_ELD_ACK			(1 << 4)
+#define GEN5_AUD_CNTL_ST2		0xE20C0
+#define GEN5_ELD_VALIDB			(1 << 0)
+#define GEN5_CP_READYB			(1 << 1)
+
+#define GEN7_HDMIW_HDMIEDID_A		0xE5050
+#define GEN7_AUD_CNTRL_ST_A		0xE50B4
+#define GEN7_AUD_CNTRL_ST2		0xE50C0
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index f107423..f8f602d 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -60,7 +60,7 @@
 	else
 		array = dev_priv->save_palette_b;
 
-	for(i = 0; i < 256; i++)
+	for (i = 0; i < 256; i++)
 		array[i] = I915_READ(reg + (i << 2));
 }
 
@@ -82,7 +82,7 @@
 	else
 		array = dev_priv->save_palette_b;
 
-	for(i = 0; i < 256; i++)
+	for (i = 0; i < 256; i++)
 		I915_WRITE(reg + (i << 2), array[i]);
 }
 
@@ -887,10 +887,10 @@
 	mutex_lock(&dev->struct_mutex);
 
 	/* Cache mode state */
-	I915_WRITE (CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
+	I915_WRITE(CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
 
 	/* Memory arbitration state */
-	I915_WRITE (MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000);
+	I915_WRITE(MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000);
 
 	for (i = 0; i < 16; i++) {
 		I915_WRITE(SWF00 + (i << 2), dev_priv->saveSWF0[i]);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index d623fef..dac7bba 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -385,29 +385,29 @@
 );
 
 TRACE_EVENT(i915_reg_rw,
-           TP_PROTO(bool write, u32 reg, u64 val, int len),
+	TP_PROTO(bool write, u32 reg, u64 val, int len),
 
-           TP_ARGS(write, reg, val, len),
+	TP_ARGS(write, reg, val, len),
 
-           TP_STRUCT__entry(
-                   __field(u64, val)
-                   __field(u32, reg)
-                   __field(u16, write)
-                   __field(u16, len)
-                   ),
+	TP_STRUCT__entry(
+		__field(u64, val)
+		__field(u32, reg)
+		__field(u16, write)
+		__field(u16, len)
+		),
 
-           TP_fast_assign(
-                   __entry->val = (u64)val;
-                   __entry->reg = reg;
-                   __entry->write = write;
-                   __entry->len = len;
-                   ),
+	TP_fast_assign(
+		__entry->val = (u64)val;
+		__entry->reg = reg;
+		__entry->write = write;
+		__entry->len = len;
+		),
 
-           TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
-                     __entry->write ? "write" : "read",
-		     __entry->reg, __entry->len,
-		     (u32)(__entry->val & 0xffffffff),
-		     (u32)(__entry->val >> 32))
+	TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
+		__entry->write ? "write" : "read",
+		__entry->reg, __entry->len,
+		(u32)(__entry->val & 0xffffffff),
+		(u32)(__entry->val >> 32))
 );
 
 #endif /* _I915_TRACE_H_ */
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index 2cb8e0b..cb91210 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -64,7 +64,7 @@
 
 	case ACPI_TYPE_BUFFER:
 		if (obj->buffer.length == 4) {
-			result =(obj->buffer.pointer[0] |
+			result = (obj->buffer.pointer[0] |
 				(obj->buffer.pointer[1] <<  8) |
 				(obj->buffer.pointer[2] << 16) |
 				(obj->buffer.pointer[3] << 24));
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 61abef8..33378da 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -381,7 +381,7 @@
 		if (p_child->dvo_port != DEVICE_PORT_DVOB &&
 			p_child->dvo_port != DEVICE_PORT_DVOC) {
 			/* skip the incorrect SDVO port */
-			DRM_DEBUG_KMS("Incorrect SDVO port. Skip it \n");
+			DRM_DEBUG_KMS("Incorrect SDVO port. Skip it\n");
 			continue;
 		}
 		DRM_DEBUG_KMS("the SDVO device with slave addr %2x is found on"
@@ -564,7 +564,7 @@
 		count++;
 	}
 	if (!count) {
-		DRM_DEBUG_KMS("no child dev is parsed from VBT \n");
+		DRM_DEBUG_KMS("no child dev is parsed from VBT\n");
 		return;
 	}
 	dev_priv->child_dev = kzalloc(sizeof(*p_child) * count, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 35d2a50..dc20d6d 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -240,7 +240,7 @@
 	 * And the device num is related with the size of general definition
 	 * block. It is obtained by using the following formula:
 	 * number = (block_size - sizeof(bdb_general_definitions))/
-	 * 		sizeof(child_device_config);
+	 *	     sizeof(child_device_config);
 	 */
 	struct child_device_config devices[0];
 } __attribute__((packed));
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0979d88..451534c 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -69,7 +69,7 @@
 	temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
 	temp &= ~ADPA_DAC_ENABLE;
 
-	switch(mode) {
+	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 		temp |= ADPA_DAC_ENABLE;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04411ad..8230cf5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
+#include <drm/drm_edid.h>
 #include "drmP.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
@@ -42,39 +43,39 @@
 
 #define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
 
-bool intel_pipe_has_type (struct drm_crtc *crtc, int type);
+bool intel_pipe_has_type(struct drm_crtc *crtc, int type);
 static void intel_update_watermarks(struct drm_device *dev);
 static void intel_increase_pllclock(struct drm_crtc *crtc);
 static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
 
 typedef struct {
-    /* given values */
-    int n;
-    int m1, m2;
-    int p1, p2;
-    /* derived values */
-    int	dot;
-    int	vco;
-    int	m;
-    int	p;
+	/* given values */
+	int n;
+	int m1, m2;
+	int p1, p2;
+	/* derived values */
+	int	dot;
+	int	vco;
+	int	m;
+	int	p;
 } intel_clock_t;
 
 typedef struct {
-    int	min, max;
+	int	min, max;
 } intel_range_t;
 
 typedef struct {
-    int	dot_limit;
-    int	p2_slow, p2_fast;
+	int	dot_limit;
+	int	p2_slow, p2_fast;
 } intel_p2_t;
 
 #define INTEL_P2_NUM		      2
 typedef struct intel_limit intel_limit_t;
 struct intel_limit {
-    intel_range_t   dot, vco, n, m, m1, m2, p, p1;
-    intel_p2_t	    p2;
-    bool (* find_pll)(const intel_limit_t *, struct drm_crtc *,
-		      int, int, intel_clock_t *);
+	intel_range_t   dot, vco, n, m, m1, m2, p, p1;
+	intel_p2_t	    p2;
+	bool (* find_pll)(const intel_limit_t *, struct drm_crtc *,
+			int, int, intel_clock_t *);
 };
 
 /* FDI */
@@ -105,56 +106,56 @@
 }
 
 static const intel_limit_t intel_limits_i8xx_dvo = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 930000, .max = 1400000 },
-        .n = { .min = 3, .max = 16 },
-        .m = { .min = 96, .max = 140 },
-        .m1 = { .min = 18, .max = 26 },
-        .m2 = { .min = 6, .max = 16 },
-        .p = { .min = 4, .max = 128 },
-        .p1 = { .min = 2, .max = 33 },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 930000, .max = 1400000 },
+	.n = { .min = 3, .max = 16 },
+	.m = { .min = 96, .max = 140 },
+	.m1 = { .min = 18, .max = 26 },
+	.m2 = { .min = 6, .max = 16 },
+	.p = { .min = 4, .max = 128 },
+	.p1 = { .min = 2, .max = 33 },
 	.p2 = { .dot_limit = 165000,
 		.p2_slow = 4, .p2_fast = 2 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i8xx_lvds = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 930000, .max = 1400000 },
-        .n = { .min = 3, .max = 16 },
-        .m = { .min = 96, .max = 140 },
-        .m1 = { .min = 18, .max = 26 },
-        .m2 = { .min = 6, .max = 16 },
-        .p = { .min = 4, .max = 128 },
-        .p1 = { .min = 1, .max = 6 },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 930000, .max = 1400000 },
+	.n = { .min = 3, .max = 16 },
+	.m = { .min = 96, .max = 140 },
+	.m1 = { .min = 18, .max = 26 },
+	.m2 = { .min = 6, .max = 16 },
+	.p = { .min = 4, .max = 128 },
+	.p1 = { .min = 1, .max = 6 },
 	.p2 = { .dot_limit = 165000,
 		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_sdvo = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1400000, .max = 2800000 },
-        .n = { .min = 1, .max = 6 },
-        .m = { .min = 70, .max = 120 },
-        .m1 = { .min = 10, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 5, .max = 80 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1400000, .max = 2800000 },
+	.n = { .min = 1, .max = 6 },
+	.m = { .min = 70, .max = 120 },
+	.m1 = { .min = 10, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 200000,
 		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_lvds = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1400000, .max = 2800000 },
-        .n = { .min = 1, .max = 6 },
-        .m = { .min = 70, .max = 120 },
-        .m1 = { .min = 10, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 7, .max = 98 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1400000, .max = 2800000 },
+	.n = { .min = 1, .max = 6 },
+	.m = { .min = 70, .max = 120 },
+	.m1 = { .min = 10, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 7, .max = 98 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 112000,
 		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
@@ -222,44 +223,44 @@
 };
 
 static const intel_limit_t intel_limits_g4x_display_port = {
-        .dot = { .min = 161670, .max = 227000 },
-        .vco = { .min = 1750000, .max = 3500000},
-        .n = { .min = 1, .max = 2 },
-        .m = { .min = 97, .max = 108 },
-        .m1 = { .min = 0x10, .max = 0x12 },
-        .m2 = { .min = 0x05, .max = 0x06 },
-        .p = { .min = 10, .max = 20 },
-        .p1 = { .min = 1, .max = 2},
-        .p2 = { .dot_limit = 0,
+	.dot = { .min = 161670, .max = 227000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 97, .max = 108 },
+	.m1 = { .min = 0x10, .max = 0x12 },
+	.m2 = { .min = 0x05, .max = 0x06 },
+	.p = { .min = 10, .max = 20 },
+	.p1 = { .min = 1, .max = 2},
+	.p2 = { .dot_limit = 0,
 		.p2_slow = 10, .p2_fast = 10 },
-        .find_pll = intel_find_pll_g4x_dp,
+	.find_pll = intel_find_pll_g4x_dp,
 };
 
 static const intel_limit_t intel_limits_pineview_sdvo = {
-        .dot = { .min = 20000, .max = 400000},
-        .vco = { .min = 1700000, .max = 3500000 },
+	.dot = { .min = 20000, .max = 400000},
+	.vco = { .min = 1700000, .max = 3500000 },
 	/* Pineview's Ncounter is a ring counter */
-        .n = { .min = 3, .max = 6 },
-        .m = { .min = 2, .max = 256 },
+	.n = { .min = 3, .max = 6 },
+	.m = { .min = 2, .max = 256 },
 	/* Pineview only has one combined m divider, which we treat as m2. */
-        .m1 = { .min = 0, .max = 0 },
-        .m2 = { .min = 0, .max = 254 },
-        .p = { .min = 5, .max = 80 },
-        .p1 = { .min = 1, .max = 8 },
+	.m1 = { .min = 0, .max = 0 },
+	.m2 = { .min = 0, .max = 254 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 200000,
 		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_pineview_lvds = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1700000, .max = 3500000 },
-        .n = { .min = 3, .max = 6 },
-        .m = { .min = 2, .max = 256 },
-        .m1 = { .min = 0, .max = 0 },
-        .m2 = { .min = 0, .max = 254 },
-        .p = { .min = 7, .max = 112 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1700000, .max = 3500000 },
+	.n = { .min = 3, .max = 6 },
+	.m = { .min = 2, .max = 256 },
+	.m1 = { .min = 0, .max = 0 },
+	.m2 = { .min = 0, .max = 254 },
+	.p = { .min = 7, .max = 112 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 112000,
 		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_find_best_PLL,
@@ -321,7 +322,7 @@
 	.m1 = { .min = 12, .max = 22 },
 	.m2 = { .min = 5, .max = 9 },
 	.p = { .min = 28, .max = 112 },
-	.p1 = { .min = 2,.max = 8 },
+	.p1 = { .min = 2, .max = 8 },
 	.p2 = { .dot_limit = 225000,
 		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
@@ -335,24 +336,24 @@
 	.m1 = { .min = 12, .max = 22 },
 	.m2 = { .min = 5, .max = 9 },
 	.p = { .min = 14, .max = 42 },
-	.p1 = { .min = 2,.max = 6 },
+	.p1 = { .min = 2, .max = 6 },
 	.p2 = { .dot_limit = 225000,
 		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_display_port = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 1760000, .max = 3510000},
-        .n = { .min = 1, .max = 2 },
-        .m = { .min = 81, .max = 90 },
-        .m1 = { .min = 12, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 10, .max = 20 },
-        .p1 = { .min = 1, .max = 2},
-        .p2 = { .dot_limit = 0,
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000},
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 81, .max = 90 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 10, .max = 20 },
+	.p1 = { .min = 1, .max = 2},
+	.p2 = { .dot_limit = 0,
 		.p2_slow = 10, .p2_fast = 10 },
-        .find_pll = intel_find_pll_ironlake_dp,
+	.find_pll = intel_find_pll_ironlake_dp,
 };
 
 static const intel_limit_t *intel_ironlake_limit(struct drm_crtc *crtc,
@@ -404,7 +405,7 @@
 		limit = &intel_limits_g4x_hdmi;
 	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO)) {
 		limit = &intel_limits_g4x_sdvo;
-	} else if (intel_pipe_has_type (crtc, INTEL_OUTPUT_DISPLAYPORT)) {
+	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
 		limit = &intel_limits_g4x_display_port;
 	} else /* The option is for other outputs */
 		limit = &intel_limits_i9xx_sdvo;
@@ -488,26 +489,26 @@
 			       const intel_clock_t *clock)
 {
 	if (clock->p1  < limit->p1.min  || limit->p1.max  < clock->p1)
-		INTELPllInvalid ("p1 out of range\n");
+		INTELPllInvalid("p1 out of range\n");
 	if (clock->p   < limit->p.min   || limit->p.max   < clock->p)
-		INTELPllInvalid ("p out of range\n");
+		INTELPllInvalid("p out of range\n");
 	if (clock->m2  < limit->m2.min  || limit->m2.max  < clock->m2)
-		INTELPllInvalid ("m2 out of range\n");
+		INTELPllInvalid("m2 out of range\n");
 	if (clock->m1  < limit->m1.min  || limit->m1.max  < clock->m1)
-		INTELPllInvalid ("m1 out of range\n");
+		INTELPllInvalid("m1 out of range\n");
 	if (clock->m1 <= clock->m2 && !IS_PINEVIEW(dev))
-		INTELPllInvalid ("m1 <= m2\n");
+		INTELPllInvalid("m1 <= m2\n");
 	if (clock->m   < limit->m.min   || limit->m.max   < clock->m)
-		INTELPllInvalid ("m out of range\n");
+		INTELPllInvalid("m out of range\n");
 	if (clock->n   < limit->n.min   || limit->n.max   < clock->n)
-		INTELPllInvalid ("n out of range\n");
+		INTELPllInvalid("n out of range\n");
 	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
-		INTELPllInvalid ("vco out of range\n");
+		INTELPllInvalid("vco out of range\n");
 	/* XXX: We may need to be checking "Dot clock" depending on the multiplier,
 	 * connector, etc., rather than just a single range.
 	 */
 	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
-		INTELPllInvalid ("dot out of range\n");
+		INTELPllInvalid("dot out of range\n");
 
 	return true;
 }
@@ -542,7 +543,7 @@
 			clock.p2 = limit->p2.p2_fast;
 	}
 
-	memset (best_clock, 0, sizeof (*best_clock));
+	memset(best_clock, 0, sizeof(*best_clock));
 
 	for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max;
 	     clock.m1++) {
@@ -2440,7 +2441,7 @@
 
 }
 
-static const int snb_b_fdi_train_param [] = {
+static const int snb_b_fdi_train_param[] = {
 	FDI_LINK_TRAIN_400MV_0DB_SNB_B,
 	FDI_LINK_TRAIN_400MV_6DB_SNB_B,
 	FDI_LINK_TRAIN_600MV_3_5DB_SNB_B,
@@ -2496,7 +2497,7 @@
 	if (HAS_PCH_CPT(dev))
 		cpt_phase_pointer_enable(dev, pipe);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2545,7 +2546,7 @@
 	POSTING_READ(reg);
 	udelay(150);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2615,7 +2616,7 @@
 	if (HAS_PCH_CPT(dev))
 		cpt_phase_pointer_enable(dev, pipe);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2657,7 +2658,7 @@
 	POSTING_READ(reg);
 	udelay(150);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -3293,14 +3294,14 @@
 	ironlake_crtc_enable(crtc);
 }
 
-void intel_encoder_prepare (struct drm_encoder *encoder)
+void intel_encoder_prepare(struct drm_encoder *encoder)
 {
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 	/* lvds has its own version of prepare see intel_lvds_prepare */
 	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
 }
 
-void intel_encoder_commit (struct drm_encoder *encoder)
+void intel_encoder_commit(struct drm_encoder *encoder)
 {
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 	/* lvds has its own version of commit see intel_lvds_commit */
@@ -5677,6 +5678,131 @@
 	return ret;
 }
 
+static void g4x_write_eld(struct drm_connector *connector,
+			  struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	uint8_t *eld = connector->eld;
+	uint32_t eldv;
+	uint32_t len;
+	uint32_t i;
+
+	i = I915_READ(G4X_AUD_VID_DID);
+
+	if (i == INTEL_AUDIO_DEVBLC || i == INTEL_AUDIO_DEVCL)
+		eldv = G4X_ELDV_DEVCL_DEVBLC;
+	else
+		eldv = G4X_ELDV_DEVCTG;
+
+	i = I915_READ(G4X_AUD_CNTL_ST);
+	i &= ~(eldv | G4X_ELD_ADDR);
+	len = (i >> 9) & 0x1f;		/* ELD buffer size */
+	I915_WRITE(G4X_AUD_CNTL_ST, i);
+
+	if (!eld[0])
+		return;
+
+	len = min_t(uint8_t, eld[2], len);
+	DRM_DEBUG_DRIVER("ELD size %d\n", len);
+	for (i = 0; i < len; i++)
+		I915_WRITE(G4X_HDMIW_HDMIEDID, *((uint32_t *)eld + i));
+
+	i = I915_READ(G4X_AUD_CNTL_ST);
+	i |= eldv;
+	I915_WRITE(G4X_AUD_CNTL_ST, i);
+}
+
+static void ironlake_write_eld(struct drm_connector *connector,
+				     struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	uint8_t *eld = connector->eld;
+	uint32_t eldv;
+	uint32_t i;
+	int len;
+	int hdmiw_hdmiedid;
+	int aud_cntl_st;
+	int aud_cntrl_st2;
+
+	if (IS_IVYBRIDGE(connector->dev)) {
+		hdmiw_hdmiedid = GEN7_HDMIW_HDMIEDID_A;
+		aud_cntl_st = GEN7_AUD_CNTRL_ST_A;
+		aud_cntrl_st2 = GEN7_AUD_CNTRL_ST2;
+	} else {
+		hdmiw_hdmiedid = GEN5_HDMIW_HDMIEDID_A;
+		aud_cntl_st = GEN5_AUD_CNTL_ST_A;
+		aud_cntrl_st2 = GEN5_AUD_CNTL_ST2;
+	}
+
+	i = to_intel_crtc(crtc)->pipe;
+	hdmiw_hdmiedid += i * 0x100;
+	aud_cntl_st += i * 0x100;
+
+	DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(i));
+
+	i = I915_READ(aud_cntl_st);
+	i = (i >> 29) & 0x3;		/* DIP_Port_Select, 0x1 = PortB */
+	if (!i) {
+		DRM_DEBUG_DRIVER("Audio directed to unknown port\n");
+		/* operate blindly on all ports */
+		eldv = GEN5_ELD_VALIDB;
+		eldv |= GEN5_ELD_VALIDB << 4;
+		eldv |= GEN5_ELD_VALIDB << 8;
+	} else {
+		DRM_DEBUG_DRIVER("ELD on port %c\n", 'A' + i);
+		eldv = GEN5_ELD_VALIDB << ((i - 1) * 4);
+	}
+
+	i = I915_READ(aud_cntrl_st2);
+	i &= ~eldv;
+	I915_WRITE(aud_cntrl_st2, i);
+
+	if (!eld[0])
+		return;
+
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
+		DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n");
+		eld[5] |= (1 << 2);	/* Conn_Type, 0x1 = DisplayPort */
+	}
+
+	i = I915_READ(aud_cntl_st);
+	i &= ~GEN5_ELD_ADDRESS;
+	I915_WRITE(aud_cntl_st, i);
+
+	len = min_t(uint8_t, eld[2], 21);	/* 84 bytes of hw ELD buffer */
+	DRM_DEBUG_DRIVER("ELD size %d\n", len);
+	for (i = 0; i < len; i++)
+		I915_WRITE(hdmiw_hdmiedid, *((uint32_t *)eld + i));
+
+	i = I915_READ(aud_cntrl_st2);
+	i |= eldv;
+	I915_WRITE(aud_cntrl_st2, i);
+}
+
+void intel_write_eld(struct drm_encoder *encoder,
+		     struct drm_display_mode *mode)
+{
+	struct drm_crtc *crtc = encoder->crtc;
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	connector = drm_select_eld(encoder, mode);
+	if (!connector)
+		return;
+
+	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
+			 connector->base.id,
+			 drm_get_connector_name(connector),
+			 connector->encoder->base.id,
+			 drm_get_encoder_name(connector->encoder));
+
+	connector->eld[6] = drm_av_sync_delay(connector, mode) / 2;
+
+	if (dev_priv->display.write_eld)
+		dev_priv->display.write_eld(connector, crtc);
+}
+
 /** Loads the palette/gamma unit for the CRTC with the prepared values */
 void intel_crtc_load_lut(struct drm_crtc *crtc)
 {
@@ -8154,7 +8280,7 @@
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_I945G(dev) || (IS_G33(dev) && ! IS_PINEVIEW_M(dev)))
+	if (IS_I945G(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
 		dev_priv->display.get_display_clock_speed =
 			i945_get_display_clock_speed;
 	else if (IS_I915G(dev))
@@ -8193,6 +8319,7 @@
 			}
 			dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
 			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else if (IS_GEN6(dev)) {
 			if (SNB_READ_WM0_LATENCY()) {
 				dev_priv->display.update_wm = sandybridge_update_wm;
@@ -8203,6 +8330,7 @@
 			}
 			dev_priv->display.fdi_link_train = gen6_fdi_link_train;
 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else if (IS_IVYBRIDGE(dev)) {
 			/* FIXME: detect B0+ stepping and use auto training */
 			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
@@ -8214,7 +8342,7 @@
 				dev_priv->display.update_wm = NULL;
 			}
 			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
-
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else
 			dev_priv->display.update_wm = NULL;
 	} else if (IS_PINEVIEW(dev)) {
@@ -8225,7 +8353,7 @@
 			DRM_INFO("failed to find known CxSR latency "
 				 "(found ddr%s fsb freq %d, mem freq %d), "
 				 "disabling CxSR\n",
-				 (dev_priv->is_ddr3 == 1) ? "3": "2",
+				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
 				 dev_priv->fsb_freq, dev_priv->mem_freq);
 			/* Disable CxSR and never update its watermark again */
 			pineview_disable_cxsr(dev);
@@ -8234,6 +8362,7 @@
 			dev_priv->display.update_wm = pineview_update_wm;
 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
 	} else if (IS_G4X(dev)) {
+		dev_priv->display.write_eld = g4x_write_eld;
 		dev_priv->display.update_wm = g4x_update_wm;
 		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
 	} else if (IS_GEN4(dev)) {
@@ -8294,7 +8423,7 @@
  * resume, or other times.  This quirk makes sure that's the case for
  * affected systems.
  */
-static void quirk_pipea_force (struct drm_device *dev)
+static void quirk_pipea_force(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -8322,7 +8451,7 @@
 	/* HP Compaq 2730p needs pipe A force quirk (LP: #291555) */
 	{ 0x2a42, 0x103c, 0x30eb, quirk_pipea_force },
 	/* HP Mini needs pipe A force quirk (LP: #322104) */
-	{ 0x27ae,0x103c, 0x361a, quirk_pipea_force },
+	{ 0x27ae, 0x103c, 0x361a, quirk_pipea_force },
 
 	/* Thinkpad R31 needs pipe A force quirk */
 	{ 0x3577, 0x1014, 0x0505, quirk_pipea_force },
@@ -8573,7 +8702,7 @@
 struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_display_error_state *error;
 	int i;
 
@@ -8589,7 +8718,7 @@
 		error->plane[i].control = I915_READ(DSPCNTR(i));
 		error->plane[i].stride = I915_READ(DSPSTRIDE(i));
 		error->plane[i].size = I915_READ(DSPSIZE(i));
-		error->plane[i].pos= I915_READ(DSPPOS(i));
+		error->plane[i].pos = I915_READ(DSPPOS(i));
 		error->plane[i].addr = I915_READ(DSPADDR(i));
 		if (INTEL_INFO(dev)->gen >= 4) {
 			error->plane[i].surface = I915_READ(DSPSURF(i));
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 6db2a2d..3d73374 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -130,7 +130,7 @@
 static void intel_dp_link_down(struct intel_dp *intel_dp);
 
 void
-intel_edp_link_config (struct intel_encoder *intel_encoder,
+intel_edp_link_config(struct intel_encoder *intel_encoder,
 		       int *lane_num, int *link_bw)
 {
 	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
@@ -377,7 +377,7 @@
 		for (i = 0; i < send_bytes; i += 4)
 			I915_WRITE(ch_data + i,
 				   pack_aux(send + i, send_bytes - i));
-	
+
 		/* Send the command and wait for it to complete */
 		I915_WRITE(ch_ctl,
 			   DP_AUX_CH_CTL_SEND_BUSY |
@@ -394,7 +394,7 @@
 				break;
 			udelay(100);
 		}
-	
+
 		/* Clear done status and any errors */
 		I915_WRITE(ch_ctl,
 			   status |
@@ -430,7 +430,7 @@
 		      DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT);
 	if (recv_bytes > recv_size)
 		recv_bytes = recv_size;
-	
+
 	for (i = 0; i < recv_bytes; i += 4)
 		unpack_aux(I915_READ(ch_data + i),
 			   recv + i, recv_bytes - i);
@@ -630,10 +630,10 @@
 	intel_dp->algo.address = 0;
 	intel_dp->algo.aux_ch = intel_dp_i2c_aux_ch;
 
-	memset(&intel_dp->adapter, '\0', sizeof (intel_dp->adapter));
+	memset(&intel_dp->adapter, '\0', sizeof(intel_dp->adapter));
 	intel_dp->adapter.owner = THIS_MODULE;
 	intel_dp->adapter.class = I2C_CLASS_DDC;
-	strncpy (intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
+	strncpy(intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
 	intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0';
 	intel_dp->adapter.algo_data = &intel_dp->algo;
 	intel_dp->adapter.dev.parent = &intel_connector->base.kdev;
@@ -834,8 +834,12 @@
 		intel_dp->DP |= DP_PORT_WIDTH_4;
 		break;
 	}
-	if (intel_dp->has_audio)
+	if (intel_dp->has_audio) {
+		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
+				 pipe_name(intel_crtc->pipe));
 		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
+		intel_write_eld(encoder, adjusted_mode);
+	}
 
 	memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
 	intel_dp->link_configuration[0] = intel_dp->link_bw;
@@ -985,7 +989,7 @@
 }
 
 /* Returns true if the panel was already on when called */
-static void ironlake_edp_panel_on (struct intel_dp *intel_dp)
+static void ironlake_edp_panel_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1066,7 +1070,7 @@
 	}
 }
 
-static void ironlake_edp_backlight_on (struct intel_dp *intel_dp)
+static void ironlake_edp_backlight_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1091,7 +1095,7 @@
 	POSTING_READ(PCH_PP_CONTROL);
 }
 
-static void ironlake_edp_backlight_off (struct intel_dp *intel_dp)
+static void ironlake_edp_backlight_off(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1759,7 +1763,7 @@
 intel_dp_get_dpcd(struct intel_dp *intel_dp)
 {
 	if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd,
-					   sizeof (intel_dp->dpcd)) &&
+					   sizeof(intel_dp->dpcd)) &&
 	    (intel_dp->dpcd[DP_DPCD_REV] != 0)) {
 		return true;
 	}
@@ -2047,7 +2051,7 @@
 }
 
 static void
-intel_dp_destroy (struct drm_connector *connector)
+intel_dp_destroy(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 
@@ -2108,7 +2112,7 @@
 
 /* Return which DP Port should be selected for Transcoder DP control */
 int
-intel_trans_dp_port_sel (struct drm_crtc *crtc)
+intel_trans_dp_port_sel(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fe1099d..98044d6 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -34,7 +34,7 @@
 #define _wait_for(COND, MS, W) ({ \
 	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS);	\
 	int ret__ = 0;							\
-	while (! (COND)) {						\
+	while (!(COND)) {						\
 		if (time_after(jiffies, timeout__)) {			\
 			ret__ = -ETIMEDOUT;				\
 			break;						\
@@ -49,10 +49,10 @@
 
 #define MSLEEP(x) do { \
 	if (in_dbg_master()) \
-	       	mdelay(x); \
+		mdelay(x); \
 	else \
 		msleep(x); \
-} while(0)
+} while (0)
 
 #define KHz(x) (1000*x)
 #define MHz(x) KHz(1000*x)
@@ -284,7 +284,7 @@
 intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 		 struct drm_display_mode *adjusted_mode);
 extern bool intel_dpd_is_edp(struct drm_device *dev);
-extern void intel_edp_link_config (struct intel_encoder *, int *, int *);
+extern void intel_edp_link_config(struct intel_encoder *, int *, int *);
 extern bool intel_encoder_is_pch_edp(struct drm_encoder *encoder);
 
 /* intel_panel.c */
@@ -304,8 +304,8 @@
 extern enum drm_connector_status intel_panel_detect(struct drm_device *dev);
 
 extern void intel_crtc_load_lut(struct drm_crtc *crtc);
-extern void intel_encoder_prepare (struct drm_encoder *encoder);
-extern void intel_encoder_commit (struct drm_encoder *encoder);
+extern void intel_encoder_prepare(struct drm_encoder *encoder);
+extern void intel_encoder_commit(struct drm_encoder *encoder);
 extern void intel_encoder_destroy(struct drm_encoder *encoder);
 
 static inline struct intel_encoder *intel_attached_encoder(struct drm_connector *connector)
@@ -377,4 +377,6 @@
 extern void intel_fb_restore_mode(struct drm_device *dev);
 
 extern void intel_init_clock_gating(struct drm_device *dev);
+extern void intel_write_eld(struct drm_encoder *encoder,
+			    struct drm_display_mode *mode);
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 226ba83..75026ba 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -245,8 +245,11 @@
 		sdvox |= HDMI_MODE_SELECT;
 
 	if (intel_hdmi->has_audio) {
+		DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
+				 pipe_name(intel_crtc->pipe));
 		sdvox |= SDVO_AUDIO_ENABLE;
 		sdvox |= SDVO_NULL_PACKETS_DURING_VSYNC;
+		intel_write_eld(encoder, adjusted_mode);
 	}
 
 	if (intel_crtc->pipe == 1) {
diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c
index 3b26a3b..be2c6fe 100644
--- a/drivers/gpu/drm/i915/intel_modes.c
+++ b/drivers/gpu/drm/i915/intel_modes.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/fb.h>
+#include <drm/drm_edid.h>
 #include "drmP.h"
 #include "intel_drv.h"
 #include "i915_drv.h"
@@ -74,6 +75,7 @@
 	if (edid) {
 		drm_mode_connector_update_edid_property(connector, edid);
 		ret = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
 		connector->display_info.raw_edid = NULL;
 		kfree(edid);
 	}
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index b8e8158b..289140b 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -51,61 +51,61 @@
 #define MBOX_ASLE      (1<<2)
 
 struct opregion_header {
-       u8 signature[16];
-       u32 size;
-       u32 opregion_ver;
-       u8 bios_ver[32];
-       u8 vbios_ver[16];
-       u8 driver_ver[16];
-       u32 mboxes;
-       u8 reserved[164];
+	u8 signature[16];
+	u32 size;
+	u32 opregion_ver;
+	u8 bios_ver[32];
+	u8 vbios_ver[16];
+	u8 driver_ver[16];
+	u32 mboxes;
+	u8 reserved[164];
 } __attribute__((packed));
 
 /* OpRegion mailbox #1: public ACPI methods */
 struct opregion_acpi {
-       u32 drdy;       /* driver readiness */
-       u32 csts;       /* notification status */
-       u32 cevt;       /* current event */
-       u8 rsvd1[20];
-       u32 didl[8];    /* supported display devices ID list */
-       u32 cpdl[8];    /* currently presented display list */
-       u32 cadl[8];    /* currently active display list */
-       u32 nadl[8];    /* next active devices list */
-       u32 aslp;       /* ASL sleep time-out */
-       u32 tidx;       /* toggle table index */
-       u32 chpd;       /* current hotplug enable indicator */
-       u32 clid;       /* current lid state*/
-       u32 cdck;       /* current docking state */
-       u32 sxsw;       /* Sx state resume */
-       u32 evts;       /* ASL supported events */
-       u32 cnot;       /* current OS notification */
-       u32 nrdy;       /* driver status */
-       u8 rsvd2[60];
+	u32 drdy;       /* driver readiness */
+	u32 csts;       /* notification status */
+	u32 cevt;       /* current event */
+	u8 rsvd1[20];
+	u32 didl[8];    /* supported display devices ID list */
+	u32 cpdl[8];    /* currently presented display list */
+	u32 cadl[8];    /* currently active display list */
+	u32 nadl[8];    /* next active devices list */
+	u32 aslp;       /* ASL sleep time-out */
+	u32 tidx;       /* toggle table index */
+	u32 chpd;       /* current hotplug enable indicator */
+	u32 clid;       /* current lid state*/
+	u32 cdck;       /* current docking state */
+	u32 sxsw;       /* Sx state resume */
+	u32 evts;       /* ASL supported events */
+	u32 cnot;       /* current OS notification */
+	u32 nrdy;       /* driver status */
+	u8 rsvd2[60];
 } __attribute__((packed));
 
 /* OpRegion mailbox #2: SWSCI */
 struct opregion_swsci {
-       u32 scic;       /* SWSCI command|status|data */
-       u32 parm;       /* command parameters */
-       u32 dslp;       /* driver sleep time-out */
-       u8 rsvd[244];
+	u32 scic;       /* SWSCI command|status|data */
+	u32 parm;       /* command parameters */
+	u32 dslp;       /* driver sleep time-out */
+	u8 rsvd[244];
 } __attribute__((packed));
 
 /* OpRegion mailbox #3: ASLE */
 struct opregion_asle {
-       u32 ardy;       /* driver readiness */
-       u32 aslc;       /* ASLE interrupt command */
-       u32 tche;       /* technology enabled indicator */
-       u32 alsi;       /* current ALS illuminance reading */
-       u32 bclp;       /* backlight brightness to set */
-       u32 pfit;       /* panel fitting state */
-       u32 cblv;       /* current brightness level */
-       u16 bclm[20];   /* backlight level duty cycle mapping table */
-       u32 cpfm;       /* current panel fitting mode */
-       u32 epfm;       /* enabled panel fitting modes */
-       u8 plut[74];    /* panel LUT and identifier */
-       u32 pfmb;       /* PWM freq and min brightness */
-       u8 rsvd[102];
+	u32 ardy;       /* driver readiness */
+	u32 aslc;       /* ASLE interrupt command */
+	u32 tche;       /* technology enabled indicator */
+	u32 alsi;       /* current ALS illuminance reading */
+	u32 bclp;       /* backlight brightness to set */
+	u32 pfit;       /* panel fitting state */
+	u32 cblv;       /* current brightness level */
+	u16 bclm[20];   /* backlight level duty cycle mapping table */
+	u32 cpfm;       /* current panel fitting mode */
+	u32 epfm;       /* enabled panel fitting modes */
+	u8 plut[74];    /* panel LUT and identifier */
+	u32 pfmb;       /* PWM freq and min brightness */
+	u8 rsvd[102];
 } __attribute__((packed));
 
 /* ASLE irq request bits */
@@ -361,7 +361,7 @@
 
 	list_for_each_entry(acpi_cdev, &acpi_video_bus->children, node) {
 		if (i >= 8) {
-			dev_printk (KERN_ERR, &dev->pdev->dev,
+			dev_printk(KERN_ERR, &dev->pdev->dev,
 				    "More than 8 outputs detected\n");
 			return;
 		}
@@ -387,7 +387,7 @@
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		int output_type = ACPI_OTHER_OUTPUT;
 		if (i >= 8) {
-			dev_printk (KERN_ERR, &dev->pdev->dev,
+			dev_printk(KERN_ERR, &dev->pdev->dev,
 				    "More than 8 outputs detected\n");
 			return;
 		}
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index d360380..cdf17d4 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -117,57 +117,57 @@
 
 /* memory bufferd overlay registers */
 struct overlay_registers {
-    u32 OBUF_0Y;
-    u32 OBUF_1Y;
-    u32 OBUF_0U;
-    u32 OBUF_0V;
-    u32 OBUF_1U;
-    u32 OBUF_1V;
-    u32 OSTRIDE;
-    u32 YRGB_VPH;
-    u32 UV_VPH;
-    u32 HORZ_PH;
-    u32 INIT_PHS;
-    u32 DWINPOS;
-    u32 DWINSZ;
-    u32 SWIDTH;
-    u32 SWIDTHSW;
-    u32 SHEIGHT;
-    u32 YRGBSCALE;
-    u32 UVSCALE;
-    u32 OCLRC0;
-    u32 OCLRC1;
-    u32 DCLRKV;
-    u32 DCLRKM;
-    u32 SCLRKVH;
-    u32 SCLRKVL;
-    u32 SCLRKEN;
-    u32 OCONFIG;
-    u32 OCMD;
-    u32 RESERVED1; /* 0x6C */
-    u32 OSTART_0Y;
-    u32 OSTART_1Y;
-    u32 OSTART_0U;
-    u32 OSTART_0V;
-    u32 OSTART_1U;
-    u32 OSTART_1V;
-    u32 OTILEOFF_0Y;
-    u32 OTILEOFF_1Y;
-    u32 OTILEOFF_0U;
-    u32 OTILEOFF_0V;
-    u32 OTILEOFF_1U;
-    u32 OTILEOFF_1V;
-    u32 FASTHSCALE; /* 0xA0 */
-    u32 UVSCALEV; /* 0xA4 */
-    u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
-    u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
-    u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
-    u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
-    u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
-    u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
-    u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
-    u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
-    u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
+	u32 OBUF_0Y;
+	u32 OBUF_1Y;
+	u32 OBUF_0U;
+	u32 OBUF_0V;
+	u32 OBUF_1U;
+	u32 OBUF_1V;
+	u32 OSTRIDE;
+	u32 YRGB_VPH;
+	u32 UV_VPH;
+	u32 HORZ_PH;
+	u32 INIT_PHS;
+	u32 DWINPOS;
+	u32 DWINSZ;
+	u32 SWIDTH;
+	u32 SWIDTHSW;
+	u32 SHEIGHT;
+	u32 YRGBSCALE;
+	u32 UVSCALE;
+	u32 OCLRC0;
+	u32 OCLRC1;
+	u32 DCLRKV;
+	u32 DCLRKM;
+	u32 SCLRKVH;
+	u32 SCLRKVL;
+	u32 SCLRKEN;
+	u32 OCONFIG;
+	u32 OCMD;
+	u32 RESERVED1; /* 0x6C */
+	u32 OSTART_0Y;
+	u32 OSTART_1Y;
+	u32 OSTART_0U;
+	u32 OSTART_0V;
+	u32 OSTART_1U;
+	u32 OSTART_1V;
+	u32 OTILEOFF_0Y;
+	u32 OTILEOFF_1Y;
+	u32 OTILEOFF_0U;
+	u32 OTILEOFF_0V;
+	u32 OTILEOFF_1U;
+	u32 OTILEOFF_1V;
+	u32 FASTHSCALE; /* 0xA0 */
+	u32 UVSCALEV; /* 0xA4 */
+	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
+	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
+	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
+	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
+	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
+	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
+	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
+	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
+	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
 };
 
 struct intel_overlay {
@@ -192,7 +192,7 @@
 static struct overlay_registers *
 intel_overlay_map_regs(struct intel_overlay *overlay)
 {
-        drm_i915_private_t *dev_priv = overlay->dev->dev_private;
+	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
 	struct overlay_registers *regs;
 
 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
@@ -264,7 +264,7 @@
 
 	mode = drm_mode_duplicate(dev, &vesa_640x480);
 	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
-	if(!drm_crtc_helper_set_mode(&crtc->base, mode,
+	if (!drm_crtc_helper_set_mode(&crtc->base, mode,
 				       crtc->base.x, crtc->base.y,
 				       crtc->base.fb))
 		return 0;
@@ -332,7 +332,7 @@
 				  bool load_polyphase_filter)
 {
 	struct drm_device *dev = overlay->dev;
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_request *request;
 	u32 flip_addr = overlay->flip_addr;
 	u32 tmp;
@@ -359,7 +359,7 @@
 	}
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	OUT_RING(flip_addr);
-        ADVANCE_LP_RING();
+	ADVANCE_LP_RING();
 
 	ret = i915_add_request(LP_RING(dev_priv), NULL, request);
 	if (ret) {
@@ -583,7 +583,7 @@
 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
 	if (!IS_GEN2(dev))
 		ret <<= 1;
-	ret -=1;
+	ret -= 1;
 	return ret << 2;
 }
 
@@ -817,7 +817,7 @@
 	regs->SWIDTHSW = calc_swidthsw(overlay->dev,
 				       params->offset_Y, tmp_width);
 	regs->SHEIGHT = params->src_h;
-	regs->OBUF_0Y = new_bo->gtt_offset + params-> offset_Y;
+	regs->OBUF_0Y = new_bo->gtt_offset + params->offset_Y;
 	regs->OSTRIDE = params->stride_Y;
 
 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -917,7 +917,7 @@
 	 * line with the intel documentation for the i965
 	 */
 	if (INTEL_INFO(dev)->gen >= 4) {
-	       	/* on i965 use the PGM reg to read out the autoscaler values */
+		/* on i965 use the PGM reg to read out the autoscaler values */
 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
 	} else {
 		if (pfit_control & VERT_AUTO_SCALE)
@@ -1098,7 +1098,7 @@
 }
 
 int intel_overlay_put_image(struct drm_device *dev, void *data,
-                            struct drm_file *file_priv)
+			    struct drm_file *file_priv)
 {
 	struct drm_intel_overlay_put_image *put_image_rec = data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -1301,10 +1301,10 @@
 }
 
 int intel_overlay_attrs(struct drm_device *dev, void *data,
-                        struct drm_file *file_priv)
+			struct drm_file *file_priv)
 {
 	struct drm_intel_overlay_attrs *attrs = data;
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay;
 	struct overlay_registers *regs;
 	int ret;
@@ -1393,7 +1393,7 @@
 
 void intel_setup_overlay(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay;
 	struct drm_i915_gem_object *reg_bo;
 	struct overlay_registers *regs;
@@ -1421,24 +1421,24 @@
 		ret = i915_gem_attach_phys_object(dev, reg_bo,
 						  I915_GEM_PHYS_OVERLAY_REGS,
 						  PAGE_SIZE);
-                if (ret) {
-                        DRM_ERROR("failed to attach phys overlay regs\n");
-                        goto out_free_bo;
-                }
+		if (ret) {
+			DRM_ERROR("failed to attach phys overlay regs\n");
+			goto out_free_bo;
+		}
 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
 	} else {
 		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true);
 		if (ret) {
-                        DRM_ERROR("failed to pin overlay register bo\n");
-                        goto out_free_bo;
-                }
+			DRM_ERROR("failed to pin overlay register bo\n");
+			goto out_free_bo;
+		}
 		overlay->flip_addr = reg_bo->gtt_offset;
 
 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
 		if (ret) {
-                        DRM_ERROR("failed to move overlay register bo into the GTT\n");
-                        goto out_unpin_bo;
-                }
+			DRM_ERROR("failed to move overlay register bo into the GTT\n");
+			goto out_unpin_bo;
+		}
 	}
 
 	/* init all values */
@@ -1525,7 +1525,7 @@
 struct intel_overlay_error_state *
 intel_overlay_capture_error_state(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay = dev_priv->overlay;
 	struct intel_overlay_error_state *error;
 	struct overlay_registers __iomem *regs;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a9e0c7b..03500e9 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -84,7 +84,7 @@
 			if (scaled_width > scaled_height) { /* pillar */
 				width = scaled_height / mode->vdisplay;
 				if (width & 1)
-				    	width++;
+					width++;
 				x = (adjusted_mode->hdisplay - width + 1) / 2;
 				y = 0;
 				height = adjusted_mode->vdisplay;
@@ -206,7 +206,7 @@
 		if (IS_PINEVIEW(dev))
 			val >>= 1;
 
-		if (is_backlight_combination_mode(dev)){
+		if (is_backlight_combination_mode(dev)) {
 			u8 lbpc;
 
 			val &= ~1;
@@ -236,7 +236,7 @@
 	if (HAS_PCH_SPLIT(dev))
 		return intel_pch_panel_set_backlight(dev, level);
 
-	if (is_backlight_combination_mode(dev)){
+	if (is_backlight_combination_mode(dev)) {
 		u32 max = intel_panel_get_max_backlight(dev);
 		u8 lbpc;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c30626e..0e99589 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -315,79 +315,127 @@
 }
 
 static void
-update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
+update_mboxes(struct intel_ring_buffer *ring,
+	    u32 seqno,
+	    u32 mmio_offset)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int id;
-
-	/*
-	 * cs -> 1 = vcs, 0 = bcs
-	 * vcs -> 1 = bcs, 0 = cs,
-	 * bcs -> 1 = cs, 0 = vcs.
-	 */
-	id = ring - dev_priv->ring;
-	id += 2 - i;
-	id %= 3;
-
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			MI_SEMAPHORE_UPDATE);
+	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
+			      MI_SEMAPHORE_GLOBAL_GTT |
+			      MI_SEMAPHORE_REGISTER |
+			      MI_SEMAPHORE_UPDATE);
 	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring,
-			RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i);
+	intel_ring_emit(ring, mmio_offset);
 }
 
+/**
+ * gen6_add_request - Update the semaphore mailbox registers
+ * 
+ * @ring - ring that is adding a request
+ * @seqno - return seqno stuck into the ring
+ *
+ * Update the mailbox registers in the *other* rings with the current seqno.
+ * This acts like a signal in the canonical semaphore.
+ */
 static int
 gen6_add_request(struct intel_ring_buffer *ring,
-		 u32 *result)
+		 u32 *seqno)
 {
-	u32 seqno;
+	u32 mbox1_reg;
+	u32 mbox2_reg;
 	int ret;
 
 	ret = intel_ring_begin(ring, 10);
 	if (ret)
 		return ret;
 
-	seqno = i915_gem_get_seqno(ring->dev);
-	update_semaphore(ring, 0, seqno);
-	update_semaphore(ring, 1, seqno);
+	mbox1_reg = ring->signal_mbox[0];
+	mbox2_reg = ring->signal_mbox[1];
 
+	*seqno = i915_gem_get_seqno(ring->dev);
+
+	update_mboxes(ring, *seqno, mbox1_reg);
+	update_mboxes(ring, *seqno, mbox2_reg);
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, *seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
-	*result = seqno;
 	return 0;
 }
 
-int
-intel_ring_sync(struct intel_ring_buffer *ring,
-		struct intel_ring_buffer *to,
+/**
+ * intel_ring_sync - sync the waiter to the signaller on seqno
+ *
+ * @waiter - ring that is waiting
+ * @signaller - ring which has, or will signal
+ * @seqno - seqno which the waiter will block on
+ */
+static int
+intel_ring_sync(struct intel_ring_buffer *waiter,
+		struct intel_ring_buffer *signaller,
+		int ring,
 		u32 seqno)
 {
 	int ret;
+	u32 dw1 = MI_SEMAPHORE_MBOX |
+		  MI_SEMAPHORE_COMPARE |
+		  MI_SEMAPHORE_REGISTER;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(waiter, 4);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			intel_ring_sync_index(ring, to) << 17 |
-			MI_SEMAPHORE_COMPARE);
-	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
+	intel_ring_emit(waiter, seqno);
+	intel_ring_emit(waiter, 0);
+	intel_ring_emit(waiter, MI_NOOP);
+	intel_ring_advance(waiter);
 
 	return 0;
 }
 
+/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
+int
+render_ring_sync_to(struct intel_ring_buffer *waiter,
+		    struct intel_ring_buffer *signaller,
+		    u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       RCS,
+			       seqno);
+}
+
+/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
+int
+gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       VCS,
+			       seqno);
+}
+
+/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
+int
+gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       BCS,
+			       seqno);
+}
+
+
+
 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
 do {									\
 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |		\
@@ -1026,7 +1074,12 @@
 	.irq_get		= render_ring_get_irq,
 	.irq_put		= render_ring_put_irq,
 	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
-       .cleanup			= render_ring_cleanup,
+	.cleanup		= render_ring_cleanup,
+	.sync_to		= render_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_RV,
+				   MI_SEMAPHORE_SYNC_RB},
+	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
 };
 
 /* ring buffer for bit-stream decoder */
@@ -1050,23 +1103,23 @@
 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
 				     u32 value)
 {
-       drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 
        /* Every tail move must follow the sequence below */
-       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
-       I915_WRITE(GEN6_BSD_RNCID, 0x0);
+	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
+	I915_WRITE(GEN6_BSD_RNCID, 0x0);
 
-       if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
-                               GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
-                       50))
-               DRM_ERROR("timed out waiting for IDLE Indicator\n");
+	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
+		GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
+		50))
+	DRM_ERROR("timed out waiting for IDLE Indicator\n");
 
-       I915_WRITE_TAIL(ring, value);
-       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
+	I915_WRITE_TAIL(ring, value);
+	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
 }
 
 static int gen6_ring_flush(struct intel_ring_buffer *ring,
@@ -1094,18 +1147,18 @@
 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
 			      u32 offset, u32 len)
 {
-       int ret;
+	int ret;
 
-       ret = intel_ring_begin(ring, 2);
-       if (ret)
-	       return ret;
+	ret = intel_ring_begin(ring, 2);
+	if (ret)
+		return ret;
 
-       intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
-       /* bit0-7 is the length on GEN6+ */
-       intel_ring_emit(ring, offset);
-       intel_ring_advance(ring);
+	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+	/* bit0-7 is the length on GEN6+ */
+	intel_ring_emit(ring, offset);
+	intel_ring_advance(ring);
 
-       return 0;
+	return 0;
 }
 
 static bool
@@ -1154,6 +1207,11 @@
 	.irq_get		= gen6_bsd_ring_get_irq,
 	.irq_put		= gen6_bsd_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.sync_to		= gen6_bsd_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
+				   MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_VB},
+	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
 };
 
 /* Blitter support (SandyBridge+) */
@@ -1272,19 +1330,24 @@
 }
 
 static const struct intel_ring_buffer gen6_blt_ring = {
-       .name			= "blt ring",
-       .id			= RING_BLT,
-       .mmio_base		= BLT_RING_BASE,
-       .size			= 32 * PAGE_SIZE,
-       .init			= blt_ring_init,
-       .write_tail		= ring_write_tail,
-       .flush			= blt_ring_flush,
-       .add_request		= gen6_add_request,
-       .get_seqno		= ring_get_seqno,
-       .irq_get			= blt_ring_get_irq,
-       .irq_put			= blt_ring_put_irq,
-       .dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
-       .cleanup			= blt_ring_cleanup,
+	.name			= "blt ring",
+	.id			= RING_BLT,
+	.mmio_base		= BLT_RING_BASE,
+	.size			= 32 * PAGE_SIZE,
+	.init			= blt_ring_init,
+	.write_tail		= ring_write_tail,
+	.flush			= blt_ring_flush,
+	.add_request		= gen6_add_request,
+	.get_seqno		= ring_get_seqno,
+	.irq_get		= blt_ring_get_irq,
+	.irq_put		= blt_ring_put_irq,
+	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.cleanup		= blt_ring_cleanup,
+	.sync_to		= gen6_blt_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
+				   MI_SEMAPHORE_SYNC_BV,
+				   MI_SEMAPHORE_SYNC_INVALID},
+	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
 };
 
 int intel_init_render_ring_buffer(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 39ac2b6..68281c9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -2,10 +2,10 @@
 #define _INTEL_RINGBUFFER_H_
 
 enum {
-    RCS = 0x0,
-    VCS,
-    BCS,
-    I915_NUM_RINGS,
+	RCS = 0x0,
+	VCS,
+	BCS,
+	I915_NUM_RINGS,
 };
 
 struct  intel_hw_status_page {
@@ -75,7 +75,12 @@
 	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
 					       u32 offset, u32 length);
 	void		(*cleanup)(struct intel_ring_buffer *ring);
+	int		(*sync_to)(struct intel_ring_buffer *ring,
+				   struct intel_ring_buffer *to,
+				   u32 seqno);
 
+	u32		semaphore_register[3]; /*our mbox written by others */
+	u32		signal_mbox[2]; /* mboxes this ring signals to */
 	/**
 	 * List of objects currently involved in rendering from the
 	 * ringbuffer.
@@ -180,9 +185,6 @@
 void intel_ring_advance(struct intel_ring_buffer *ring);
 
 u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
-int intel_ring_sync(struct intel_ring_buffer *ring,
-		    struct intel_ring_buffer *to,
-		    u32 seqno);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 6348c49..7312002 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -43,7 +43,7 @@
 #define SDVO_TV_MASK   (SDVO_OUTPUT_CVBS0 | SDVO_OUTPUT_SVID0)
 
 #define SDVO_OUTPUT_MASK (SDVO_TMDS_MASK | SDVO_RGB_MASK | SDVO_LVDS_MASK |\
-                         SDVO_TV_MASK)
+			SDVO_TV_MASK)
 
 #define IS_TV(c)	(c->output_flag & SDVO_TV_MASK)
 #define IS_TMDS(c)	(c->output_flag & SDVO_TMDS_MASK)
@@ -288,117 +288,117 @@
 	u8 cmd;
 	const char *name;
 } sdvo_cmd_names[] = {
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
 
-    /* Add the op code for SDVO enhancements */
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DOT_CRAWL),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DOT_CRAWL),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_LUMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_LUMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_LUMA_FILTER),
+	/* Add the op code for SDVO enhancements */
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DOT_CRAWL),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DOT_CRAWL),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_LUMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_LUMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_LUMA_FILTER),
 
-    /* HDMI op code */
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
+	/* HDMI op code */
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
 };
 
 #define IS_SDVOB(reg)	(reg == SDVOB || reg == PCH_SDVOB)
@@ -2275,7 +2275,7 @@
 		DRM_DEBUG_KMS(#name ": max %d, default %d, current %d\n", \
 			      data_value[0], data_value[1], response); \
 	} \
-} while(0)
+} while (0)
 
 static bool
 intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo,
@@ -2442,7 +2442,7 @@
 
 	if (IS_TV(intel_sdvo_connector))
 		return intel_sdvo_create_enhance_property_tv(intel_sdvo, intel_sdvo_connector, enhancements.reply);
-	else if(IS_LVDS(intel_sdvo_connector))
+	else if (IS_LVDS(intel_sdvo_connector))
 		return intel_sdvo_create_enhance_property_lvds(intel_sdvo, intel_sdvo_connector, enhancements.reply);
 	else
 		return true;
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
index 4f4e23b..4aa6f34 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -46,63 +46,63 @@
 #define SDVO_OUTPUT_LAST    (14)
 
 struct intel_sdvo_caps {
-    u8 vendor_id;
-    u8 device_id;
-    u8 device_rev_id;
-    u8 sdvo_version_major;
-    u8 sdvo_version_minor;
-    unsigned int sdvo_inputs_mask:2;
-    unsigned int smooth_scaling:1;
-    unsigned int sharp_scaling:1;
-    unsigned int up_scaling:1;
-    unsigned int down_scaling:1;
-    unsigned int stall_support:1;
-    unsigned int pad:1;
-    u16 output_flags;
+	u8 vendor_id;
+	u8 device_id;
+	u8 device_rev_id;
+	u8 sdvo_version_major;
+	u8 sdvo_version_minor;
+	unsigned int sdvo_inputs_mask:2;
+	unsigned int smooth_scaling:1;
+	unsigned int sharp_scaling:1;
+	unsigned int up_scaling:1;
+	unsigned int down_scaling:1;
+	unsigned int stall_support:1;
+	unsigned int pad:1;
+	u16 output_flags;
 } __attribute__((packed));
 
 /** This matches the EDID DTD structure, more or less */
 struct intel_sdvo_dtd {
-    struct {
-	u16 clock;		/**< pixel clock, in 10kHz units */
-	u8 h_active;		/**< lower 8 bits (pixels) */
-	u8 h_blank;		/**< lower 8 bits (pixels) */
-	u8 h_high;		/**< upper 4 bits each h_active, h_blank */
-	u8 v_active;		/**< lower 8 bits (lines) */
-	u8 v_blank;		/**< lower 8 bits (lines) */
-	u8 v_high;		/**< upper 4 bits each v_active, v_blank */
-    } part1;
+	struct {
+		u16 clock;	/**< pixel clock, in 10kHz units */
+		u8 h_active;	/**< lower 8 bits (pixels) */
+		u8 h_blank;	/**< lower 8 bits (pixels) */
+		u8 h_high;	/**< upper 4 bits each h_active, h_blank */
+		u8 v_active;	/**< lower 8 bits (lines) */
+		u8 v_blank;	/**< lower 8 bits (lines) */
+		u8 v_high;	/**< upper 4 bits each v_active, v_blank */
+	} part1;
 
-    struct {
-	u8 h_sync_off;	/**< lower 8 bits, from hblank start */
-	u8 h_sync_width;	/**< lower 8 bits (pixels) */
-	/** lower 4 bits each vsync offset, vsync width */
-	u8 v_sync_off_width;
-	/**
-	 * 2 high bits of hsync offset, 2 high bits of hsync width,
-	 * bits 4-5 of vsync offset, and 2 high bits of vsync width.
-	 */
-	u8 sync_off_width_high;
-	u8 dtd_flags;
-	u8 sdvo_flags;
-	/** bits 6-7 of vsync offset at bits 6-7 */
-	u8 v_sync_off_high;
-	u8 reserved;
-    } part2;
+	struct {
+		u8 h_sync_off;	/**< lower 8 bits, from hblank start */
+		u8 h_sync_width;	/**< lower 8 bits (pixels) */
+		/** lower 4 bits each vsync offset, vsync width */
+		u8 v_sync_off_width;
+		/**
+		* 2 high bits of hsync offset, 2 high bits of hsync width,
+		* bits 4-5 of vsync offset, and 2 high bits of vsync width.
+		*/
+		u8 sync_off_width_high;
+		u8 dtd_flags;
+		u8 sdvo_flags;
+		/** bits 6-7 of vsync offset at bits 6-7 */
+		u8 v_sync_off_high;
+		u8 reserved;
+	} part2;
 } __attribute__((packed));
 
 struct intel_sdvo_pixel_clock_range {
-    u16 min;			/**< pixel clock, in 10kHz units */
-    u16 max;			/**< pixel clock, in 10kHz units */
+	u16 min;	/**< pixel clock, in 10kHz units */
+	u16 max;	/**< pixel clock, in 10kHz units */
 } __attribute__((packed));
 
 struct intel_sdvo_preferred_input_timing_args {
-    u16 clock;
-    u16 width;
-    u16 height;
-    u8	interlace:1;
-    u8	scaled:1;
-    u8	pad:6;
+	u16 clock;
+	u16 width;
+	u16 height;
+	u8	interlace:1;
+	u8	scaled:1;
+	u8	pad:6;
 } __attribute__((packed));
 
 /* I2C registers for SDVO */
@@ -154,9 +154,9 @@
  */
 #define SDVO_CMD_GET_TRAINED_INPUTS			0x03
 struct intel_sdvo_get_trained_inputs_response {
-    unsigned int input0_trained:1;
-    unsigned int input1_trained:1;
-    unsigned int pad:6;
+	unsigned int input0_trained:1;
+	unsigned int input1_trained:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 /** Returns a struct intel_sdvo_output_flags of active outputs. */
@@ -177,7 +177,7 @@
  */
 #define SDVO_CMD_GET_IN_OUT_MAP				0x06
 struct intel_sdvo_in_out_map {
-    u16 in0, in1;
+	u16 in0, in1;
 };
 
 /**
@@ -210,10 +210,10 @@
 
 #define SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE		0x0f
 struct intel_sdvo_get_interrupt_event_source_response {
-    u16 interrupt_status;
-    unsigned int ambient_light_interrupt:1;
-    unsigned int hdmi_audio_encrypt_change:1;
-    unsigned int pad:6;
+	u16 interrupt_status;
+	unsigned int ambient_light_interrupt:1;
+	unsigned int hdmi_audio_encrypt_change:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 /**
@@ -225,8 +225,8 @@
  */
 #define SDVO_CMD_SET_TARGET_INPUT			0x10
 struct intel_sdvo_set_target_input_args {
-    unsigned int target_1:1;
-    unsigned int pad:7;
+	unsigned int target_1:1;
+	unsigned int pad:7;
 } __attribute__((packed));
 
 /**
@@ -314,57 +314,57 @@
 #define SDVO_CMD_GET_SUPPORTED_TV_FORMATS		0x27
 /** 6 bytes of bit flags for TV formats shared by all TV format functions */
 struct intel_sdvo_tv_format {
-    unsigned int ntsc_m:1;
-    unsigned int ntsc_j:1;
-    unsigned int ntsc_443:1;
-    unsigned int pal_b:1;
-    unsigned int pal_d:1;
-    unsigned int pal_g:1;
-    unsigned int pal_h:1;
-    unsigned int pal_i:1;
+	unsigned int ntsc_m:1;
+	unsigned int ntsc_j:1;
+	unsigned int ntsc_443:1;
+	unsigned int pal_b:1;
+	unsigned int pal_d:1;
+	unsigned int pal_g:1;
+	unsigned int pal_h:1;
+	unsigned int pal_i:1;
 
-    unsigned int pal_m:1;
-    unsigned int pal_n:1;
-    unsigned int pal_nc:1;
-    unsigned int pal_60:1;
-    unsigned int secam_b:1;
-    unsigned int secam_d:1;
-    unsigned int secam_g:1;
-    unsigned int secam_k:1;
+	unsigned int pal_m:1;
+	unsigned int pal_n:1;
+	unsigned int pal_nc:1;
+	unsigned int pal_60:1;
+	unsigned int secam_b:1;
+	unsigned int secam_d:1;
+	unsigned int secam_g:1;
+	unsigned int secam_k:1;
 
-    unsigned int secam_k1:1;
-    unsigned int secam_l:1;
-    unsigned int secam_60:1;
-    unsigned int hdtv_std_smpte_240m_1080i_59:1;
-    unsigned int hdtv_std_smpte_240m_1080i_60:1;
-    unsigned int hdtv_std_smpte_260m_1080i_59:1;
-    unsigned int hdtv_std_smpte_260m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080i_50:1;
+	unsigned int secam_k1:1;
+	unsigned int secam_l:1;
+	unsigned int secam_60:1;
+	unsigned int hdtv_std_smpte_240m_1080i_59:1;
+	unsigned int hdtv_std_smpte_240m_1080i_60:1;
+	unsigned int hdtv_std_smpte_260m_1080i_59:1;
+	unsigned int hdtv_std_smpte_260m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080i_50:1;
 
-    unsigned int hdtv_std_smpte_274m_1080i_59:1;
-    unsigned int hdtv_std_smpte_274m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080p_23:1;
-    unsigned int hdtv_std_smpte_274m_1080p_24:1;
-    unsigned int hdtv_std_smpte_274m_1080p_25:1;
-    unsigned int hdtv_std_smpte_274m_1080p_29:1;
-    unsigned int hdtv_std_smpte_274m_1080p_30:1;
-    unsigned int hdtv_std_smpte_274m_1080p_50:1;
+	unsigned int hdtv_std_smpte_274m_1080i_59:1;
+	unsigned int hdtv_std_smpte_274m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080p_23:1;
+	unsigned int hdtv_std_smpte_274m_1080p_24:1;
+	unsigned int hdtv_std_smpte_274m_1080p_25:1;
+	unsigned int hdtv_std_smpte_274m_1080p_29:1;
+	unsigned int hdtv_std_smpte_274m_1080p_30:1;
+	unsigned int hdtv_std_smpte_274m_1080p_50:1;
 
-    unsigned int hdtv_std_smpte_274m_1080p_59:1;
-    unsigned int hdtv_std_smpte_274m_1080p_60:1;
-    unsigned int hdtv_std_smpte_295m_1080i_50:1;
-    unsigned int hdtv_std_smpte_295m_1080p_50:1;
-    unsigned int hdtv_std_smpte_296m_720p_59:1;
-    unsigned int hdtv_std_smpte_296m_720p_60:1;
-    unsigned int hdtv_std_smpte_296m_720p_50:1;
-    unsigned int hdtv_std_smpte_293m_480p_59:1;
+	unsigned int hdtv_std_smpte_274m_1080p_59:1;
+	unsigned int hdtv_std_smpte_274m_1080p_60:1;
+	unsigned int hdtv_std_smpte_295m_1080i_50:1;
+	unsigned int hdtv_std_smpte_295m_1080p_50:1;
+	unsigned int hdtv_std_smpte_296m_720p_59:1;
+	unsigned int hdtv_std_smpte_296m_720p_60:1;
+	unsigned int hdtv_std_smpte_296m_720p_50:1;
+	unsigned int hdtv_std_smpte_293m_480p_59:1;
 
-    unsigned int hdtv_std_smpte_170m_480i_59:1;
-    unsigned int hdtv_std_iturbt601_576i_50:1;
-    unsigned int hdtv_std_iturbt601_576p_50:1;
-    unsigned int hdtv_std_eia_7702a_480i_60:1;
-    unsigned int hdtv_std_eia_7702a_480p_60:1;
-    unsigned int pad:3;
+	unsigned int hdtv_std_smpte_170m_480i_59:1;
+	unsigned int hdtv_std_iturbt601_576i_50:1;
+	unsigned int hdtv_std_iturbt601_576p_50:1;
+	unsigned int hdtv_std_eia_7702a_480i_60:1;
+	unsigned int hdtv_std_eia_7702a_480p_60:1;
+	unsigned int pad:3;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_TV_FORMAT				0x28
@@ -374,53 +374,53 @@
 /** Returns the resolutiosn that can be used with the given TV format */
 #define SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT		0x83
 struct intel_sdvo_sdtv_resolution_request {
-    unsigned int ntsc_m:1;
-    unsigned int ntsc_j:1;
-    unsigned int ntsc_443:1;
-    unsigned int pal_b:1;
-    unsigned int pal_d:1;
-    unsigned int pal_g:1;
-    unsigned int pal_h:1;
-    unsigned int pal_i:1;
+	unsigned int ntsc_m:1;
+	unsigned int ntsc_j:1;
+	unsigned int ntsc_443:1;
+	unsigned int pal_b:1;
+	unsigned int pal_d:1;
+	unsigned int pal_g:1;
+	unsigned int pal_h:1;
+	unsigned int pal_i:1;
 
-    unsigned int pal_m:1;
-    unsigned int pal_n:1;
-    unsigned int pal_nc:1;
-    unsigned int pal_60:1;
-    unsigned int secam_b:1;
-    unsigned int secam_d:1;
-    unsigned int secam_g:1;
-    unsigned int secam_k:1;
+	unsigned int pal_m:1;
+	unsigned int pal_n:1;
+	unsigned int pal_nc:1;
+	unsigned int pal_60:1;
+	unsigned int secam_b:1;
+	unsigned int secam_d:1;
+	unsigned int secam_g:1;
+	unsigned int secam_k:1;
 
-    unsigned int secam_k1:1;
-    unsigned int secam_l:1;
-    unsigned int secam_60:1;
-    unsigned int pad:5;
+	unsigned int secam_k1:1;
+	unsigned int secam_l:1;
+	unsigned int secam_60:1;
+	unsigned int pad:5;
 } __attribute__((packed));
 
 struct intel_sdvo_sdtv_resolution_reply {
-    unsigned int res_320x200:1;
-    unsigned int res_320x240:1;
-    unsigned int res_400x300:1;
-    unsigned int res_640x350:1;
-    unsigned int res_640x400:1;
-    unsigned int res_640x480:1;
-    unsigned int res_704x480:1;
-    unsigned int res_704x576:1;
+	unsigned int res_320x200:1;
+	unsigned int res_320x240:1;
+	unsigned int res_400x300:1;
+	unsigned int res_640x350:1;
+	unsigned int res_640x400:1;
+	unsigned int res_640x480:1;
+	unsigned int res_704x480:1;
+	unsigned int res_704x576:1;
 
-    unsigned int res_720x350:1;
-    unsigned int res_720x400:1;
-    unsigned int res_720x480:1;
-    unsigned int res_720x540:1;
-    unsigned int res_720x576:1;
-    unsigned int res_768x576:1;
-    unsigned int res_800x600:1;
-    unsigned int res_832x624:1;
+	unsigned int res_720x350:1;
+	unsigned int res_720x400:1;
+	unsigned int res_720x480:1;
+	unsigned int res_720x540:1;
+	unsigned int res_720x576:1;
+	unsigned int res_768x576:1;
+	unsigned int res_800x600:1;
+	unsigned int res_832x624:1;
 
-    unsigned int res_920x766:1;
-    unsigned int res_1024x768:1;
-    unsigned int res_1280x1024:1;
-    unsigned int pad:5;
+	unsigned int res_920x766:1;
+	unsigned int res_1024x768:1;
+	unsigned int res_1280x1024:1;
+	unsigned int pad:5;
 } __attribute__((packed));
 
 /* Get supported resolution with squire pixel aspect ratio that can be
@@ -428,90 +428,90 @@
 #define SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT		0x85
 
 struct intel_sdvo_hdtv_resolution_request {
-    unsigned int hdtv_std_smpte_240m_1080i_59:1;
-    unsigned int hdtv_std_smpte_240m_1080i_60:1;
-    unsigned int hdtv_std_smpte_260m_1080i_59:1;
-    unsigned int hdtv_std_smpte_260m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080i_50:1;
-    unsigned int hdtv_std_smpte_274m_1080i_59:1;
-    unsigned int hdtv_std_smpte_274m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080p_23:1;
+	unsigned int hdtv_std_smpte_240m_1080i_59:1;
+	unsigned int hdtv_std_smpte_240m_1080i_60:1;
+	unsigned int hdtv_std_smpte_260m_1080i_59:1;
+	unsigned int hdtv_std_smpte_260m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080i_50:1;
+	unsigned int hdtv_std_smpte_274m_1080i_59:1;
+	unsigned int hdtv_std_smpte_274m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080p_23:1;
 
-    unsigned int hdtv_std_smpte_274m_1080p_24:1;
-    unsigned int hdtv_std_smpte_274m_1080p_25:1;
-    unsigned int hdtv_std_smpte_274m_1080p_29:1;
-    unsigned int hdtv_std_smpte_274m_1080p_30:1;
-    unsigned int hdtv_std_smpte_274m_1080p_50:1;
-    unsigned int hdtv_std_smpte_274m_1080p_59:1;
-    unsigned int hdtv_std_smpte_274m_1080p_60:1;
-    unsigned int hdtv_std_smpte_295m_1080i_50:1;
+	unsigned int hdtv_std_smpte_274m_1080p_24:1;
+	unsigned int hdtv_std_smpte_274m_1080p_25:1;
+	unsigned int hdtv_std_smpte_274m_1080p_29:1;
+	unsigned int hdtv_std_smpte_274m_1080p_30:1;
+	unsigned int hdtv_std_smpte_274m_1080p_50:1;
+	unsigned int hdtv_std_smpte_274m_1080p_59:1;
+	unsigned int hdtv_std_smpte_274m_1080p_60:1;
+	unsigned int hdtv_std_smpte_295m_1080i_50:1;
 
-    unsigned int hdtv_std_smpte_295m_1080p_50:1;
-    unsigned int hdtv_std_smpte_296m_720p_59:1;
-    unsigned int hdtv_std_smpte_296m_720p_60:1;
-    unsigned int hdtv_std_smpte_296m_720p_50:1;
-    unsigned int hdtv_std_smpte_293m_480p_59:1;
-    unsigned int hdtv_std_smpte_170m_480i_59:1;
-    unsigned int hdtv_std_iturbt601_576i_50:1;
-    unsigned int hdtv_std_iturbt601_576p_50:1;
+	unsigned int hdtv_std_smpte_295m_1080p_50:1;
+	unsigned int hdtv_std_smpte_296m_720p_59:1;
+	unsigned int hdtv_std_smpte_296m_720p_60:1;
+	unsigned int hdtv_std_smpte_296m_720p_50:1;
+	unsigned int hdtv_std_smpte_293m_480p_59:1;
+	unsigned int hdtv_std_smpte_170m_480i_59:1;
+	unsigned int hdtv_std_iturbt601_576i_50:1;
+	unsigned int hdtv_std_iturbt601_576p_50:1;
 
-    unsigned int hdtv_std_eia_7702a_480i_60:1;
-    unsigned int hdtv_std_eia_7702a_480p_60:1;
-    unsigned int pad:6;
+	unsigned int hdtv_std_eia_7702a_480i_60:1;
+	unsigned int hdtv_std_eia_7702a_480p_60:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 struct intel_sdvo_hdtv_resolution_reply {
-    unsigned int res_640x480:1;
-    unsigned int res_800x600:1;
-    unsigned int res_1024x768:1;
-    unsigned int res_1280x960:1;
-    unsigned int res_1400x1050:1;
-    unsigned int res_1600x1200:1;
-    unsigned int res_1920x1440:1;
-    unsigned int res_2048x1536:1;
+	unsigned int res_640x480:1;
+	unsigned int res_800x600:1;
+	unsigned int res_1024x768:1;
+	unsigned int res_1280x960:1;
+	unsigned int res_1400x1050:1;
+	unsigned int res_1600x1200:1;
+	unsigned int res_1920x1440:1;
+	unsigned int res_2048x1536:1;
 
-    unsigned int res_2560x1920:1;
-    unsigned int res_3200x2400:1;
-    unsigned int res_3840x2880:1;
-    unsigned int pad1:5;
+	unsigned int res_2560x1920:1;
+	unsigned int res_3200x2400:1;
+	unsigned int res_3840x2880:1;
+	unsigned int pad1:5;
 
-    unsigned int res_848x480:1;
-    unsigned int res_1064x600:1;
-    unsigned int res_1280x720:1;
-    unsigned int res_1360x768:1;
-    unsigned int res_1704x960:1;
-    unsigned int res_1864x1050:1;
-    unsigned int res_1920x1080:1;
-    unsigned int res_2128x1200:1;
+	unsigned int res_848x480:1;
+	unsigned int res_1064x600:1;
+	unsigned int res_1280x720:1;
+	unsigned int res_1360x768:1;
+	unsigned int res_1704x960:1;
+	unsigned int res_1864x1050:1;
+	unsigned int res_1920x1080:1;
+	unsigned int res_2128x1200:1;
 
-    unsigned int res_2560x1400:1;
-    unsigned int res_2728x1536:1;
-    unsigned int res_3408x1920:1;
-    unsigned int res_4264x2400:1;
-    unsigned int res_5120x2880:1;
-    unsigned int pad2:3;
+	unsigned int res_2560x1400:1;
+	unsigned int res_2728x1536:1;
+	unsigned int res_3408x1920:1;
+	unsigned int res_4264x2400:1;
+	unsigned int res_5120x2880:1;
+	unsigned int pad2:3;
 
-    unsigned int res_768x480:1;
-    unsigned int res_960x600:1;
-    unsigned int res_1152x720:1;
-    unsigned int res_1124x768:1;
-    unsigned int res_1536x960:1;
-    unsigned int res_1680x1050:1;
-    unsigned int res_1728x1080:1;
-    unsigned int res_1920x1200:1;
+	unsigned int res_768x480:1;
+	unsigned int res_960x600:1;
+	unsigned int res_1152x720:1;
+	unsigned int res_1124x768:1;
+	unsigned int res_1536x960:1;
+	unsigned int res_1680x1050:1;
+	unsigned int res_1728x1080:1;
+	unsigned int res_1920x1200:1;
 
-    unsigned int res_2304x1440:1;
-    unsigned int res_2456x1536:1;
-    unsigned int res_3072x1920:1;
-    unsigned int res_3840x2400:1;
-    unsigned int res_4608x2880:1;
-    unsigned int pad3:3;
+	unsigned int res_2304x1440:1;
+	unsigned int res_2456x1536:1;
+	unsigned int res_3072x1920:1;
+	unsigned int res_3840x2400:1;
+	unsigned int res_4608x2880:1;
+	unsigned int pad3:3;
 
-    unsigned int res_1280x1024:1;
-    unsigned int pad4:7;
+	unsigned int res_1280x1024:1;
+	unsigned int pad4:7;
 
-    unsigned int res_1280x768:1;
-    unsigned int pad5:7;
+	unsigned int res_1280x768:1;
+	unsigned int pad5:7;
 } __attribute__((packed));
 
 /* Get supported power state returns info for encoder and monitor, rely on
@@ -539,25 +539,25 @@
  * The high fields are bits 8:9 of the 10-bit values.
  */
 struct sdvo_panel_power_sequencing {
-    u8 t0;
-    u8 t1;
-    u8 t2;
-    u8 t3;
-    u8 t4;
+	u8 t0;
+	u8 t1;
+	u8 t2;
+	u8 t3;
+	u8 t4;
 
-    unsigned int t0_high:2;
-    unsigned int t1_high:2;
-    unsigned int t2_high:2;
-    unsigned int t3_high:2;
+	unsigned int t0_high:2;
+	unsigned int t1_high:2;
+	unsigned int t2_high:2;
+	unsigned int t3_high:2;
 
-    unsigned int t4_high:2;
-    unsigned int pad:6;
+	unsigned int t4_high:2;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_MAX_BACKLIGHT_LEVEL		0x30
 struct sdvo_max_backlight_reply {
-    u8 max_value;
-    u8 default_value;
+	u8 max_value;
+	u8 default_value;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_BACKLIGHT_LEVEL			0x31
@@ -565,16 +565,16 @@
 
 #define SDVO_CMD_GET_AMBIENT_LIGHT			0x33
 struct sdvo_get_ambient_light_reply {
-    u16 trip_low;
-    u16 trip_high;
-    u16 value;
+	u16 trip_low;
+	u16 trip_high;
+	u16 value;
 } __attribute__((packed));
 #define SDVO_CMD_SET_AMBIENT_LIGHT			0x34
 struct sdvo_set_ambient_light_reply {
-    u16 trip_low;
-    u16 trip_high;
-    unsigned int enable:1;
-    unsigned int pad:7;
+	u16 trip_low;
+	u16 trip_high;
+	unsigned int enable:1;
+	unsigned int pad:7;
 } __attribute__((packed));
 
 /* Set display power state */
@@ -586,23 +586,23 @@
 
 #define SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS		0x84
 struct intel_sdvo_enhancements_reply {
-    unsigned int flicker_filter:1;
-    unsigned int flicker_filter_adaptive:1;
-    unsigned int flicker_filter_2d:1;
-    unsigned int saturation:1;
-    unsigned int hue:1;
-    unsigned int brightness:1;
-    unsigned int contrast:1;
-    unsigned int overscan_h:1;
+	unsigned int flicker_filter:1;
+	unsigned int flicker_filter_adaptive:1;
+	unsigned int flicker_filter_2d:1;
+	unsigned int saturation:1;
+	unsigned int hue:1;
+	unsigned int brightness:1;
+	unsigned int contrast:1;
+	unsigned int overscan_h:1;
 
-    unsigned int overscan_v:1;
-    unsigned int hpos:1;
-    unsigned int vpos:1;
-    unsigned int sharpness:1;
-    unsigned int dot_crawl:1;
-    unsigned int dither:1;
-    unsigned int tv_chroma_filter:1;
-    unsigned int tv_luma_filter:1;
+	unsigned int overscan_v:1;
+	unsigned int hpos:1;
+	unsigned int vpos:1;
+	unsigned int sharpness:1;
+	unsigned int dot_crawl:1;
+	unsigned int dither:1;
+	unsigned int tv_chroma_filter:1;
+	unsigned int tv_luma_filter:1;
 } __attribute__((packed));
 
 /* Picture enhancement limits below are dependent on the current TV format,
@@ -623,8 +623,8 @@
 #define SDVO_CMD_GET_MAX_TV_CHROMA_FILTER		0x74
 #define SDVO_CMD_GET_MAX_TV_LUMA_FILTER			0x77
 struct intel_sdvo_enhancement_limits_reply {
-    u16 max_value;
-    u16 default_value;
+	u16 max_value;
+	u16 default_value;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_LVDS_PANEL_INFORMATION		0x7f
@@ -665,8 +665,8 @@
 #define SDVO_CMD_GET_TV_LUMA_FILTER			0x78
 #define SDVO_CMD_SET_TV_LUMA_FILTER			0x79
 struct intel_sdvo_enhancements_arg {
-    u16 value;
-}__attribute__((packed));
+	u16 value;
+} __attribute__((packed));
 
 #define SDVO_CMD_GET_DOT_CRAWL				0x70
 #define SDVO_CMD_SET_DOT_CRAWL				0x71
@@ -717,7 +717,7 @@
 #define SDVO_CMD_GET_AUDIO_TX_INFO	0x9c
 #define SDVO_NEED_TO_STALL  (1 << 7)
 
-struct intel_sdvo_encode{
-    u8 dvi_rev;
-    u8 hdmi_rev;
+struct intel_sdvo_encode {
+	u8 dvi_rev;
+	u8 hdmi_rev;
 } __attribute__ ((packed));
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 210d570..f3c6a9a 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -194,10 +194,10 @@
  *
  *     if (f >= 1) {
  *         exp = 0x7;
- * 	   mant = 1 << 8;
+ *	   mant = 1 << 8;
  *     } else {
  *         for (exp = 0; exp < 3 && f < 0.5; exp++)
- * 	       f *= 2.0;
+ *	   f *= 2.0;
  *         mant = (f * (1 << 9) + 0.5);
  *         if (mant >= (1 << 9))
  *             mant = (1 << 9) - 1;
@@ -430,7 +430,7 @@
 		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -472,7 +472,7 @@
 		.vsync_start_f1 = 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -515,7 +515,7 @@
 		.vsync_start_f1	= 6,	    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena      = true,	    .veq_start_f1	= 0,
 		.veq_start_f2 = 1,	    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -558,7 +558,7 @@
 		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -602,14 +602,14 @@
 		.vsync_start_f1	= 6,	   .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
 		.nbr_end	= 286,
 
 		.burst_ena	= true,
-		.hburst_start = 73,	    	    .hburst_len		= 34,
+		.hburst_start = 73,	    .hburst_len		= 34,
 		.vburst_start_f1 = 8,	    .vburst_end_f1	= 285,
 		.vburst_start_f2 = 8,	    .vburst_end_f2	= 286,
 		.vburst_start_f3 = 9,	    .vburst_end_f3	= 286,
@@ -646,7 +646,7 @@
 		.vsync_start_f1	= 5,	    .vsync_start_f2	= 6,
 		.vsync_len	= 5,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,	    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,	    .veq_len		= 15,
 
 		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
@@ -675,7 +675,7 @@
 	},
 	{
 		.name       = "480p@59.94Hz",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 59940,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -683,7 +683,7 @@
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 857,
 
-		.progressive    = true,.trilevel_sync = false,
+		.progressive    = true,		    .trilevel_sync = false,
 
 		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
 		.vsync_len      = 12,
@@ -699,7 +699,7 @@
 	},
 	{
 		.name       = "480p@60Hz",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 60000,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -707,7 +707,7 @@
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 856,
 
-		.progressive    = true,.trilevel_sync = false,
+		.progressive    = true,		    .trilevel_sync = false,
 
 		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
 		.vsync_len      = 12,
@@ -723,7 +723,7 @@
 	},
 	{
 		.name       = "576p",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 50000,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -755,7 +755,7 @@
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1649,
 
-		.progressive    = true, 	    .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -779,7 +779,7 @@
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1651,
 
-		.progressive    = true, 	    .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -803,7 +803,7 @@
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1979,
 
-		.progressive    = true, 	        .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -828,12 +828,12 @@
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2639,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	  .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,              .vsync_start_f2     = 5,
 		.vsync_len      = 10,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_ena	= true,	    .veq_start_f1	= 4,
 		.veq_start_f2   = 4,	    .veq_len		= 10,
 
 
@@ -854,12 +854,12 @@
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2199,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	    .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,               .vsync_start_f2     = 5,
 		.vsync_len      = 10,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_ena	= true,		    .veq_start_f1	= 4,
 		.veq_start_f2	= 4,		    .veq_len		= 10,
 
 
@@ -880,16 +880,16 @@
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2201,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	    .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,            .vsync_start_f2    = 5,
 		.vsync_len      = 10,
 
 		.veq_ena	= true,		    .veq_start_f1	= 4,
-		.veq_start_f2 = 4,	    	    .veq_len = 10,
+		.veq_start_f2	= 4,		.veq_len	  = 10,
 
 
-		.vi_end_f1      = 21,           .vi_end_f2         	= 22,
+		.vi_end_f1	= 21,		.vi_end_f2	  = 22,
 		.nbr_end        = 539,
 
 		.burst_ena      = false,
@@ -916,7 +916,7 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	switch(mode) {
+	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 		I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
 		break;
@@ -933,7 +933,7 @@
 {
 	int i;
 
-	for (i = 0; i < sizeof(tv_modes) / sizeof (tv_modes[0]); i++) {
+	for (i = 0; i < sizeof(tv_modes) / sizeof(tv_modes[0]); i++) {
 		const struct tv_mode *tv_mode = &tv_modes[i];
 
 		if (!strcmp(tv_format, tv_mode->name))
@@ -1128,7 +1128,7 @@
 	if (color_conversion) {
 		I915_WRITE(TV_CSC_Y, (color_conversion->ry << 16) |
 			   color_conversion->gy);
-		I915_WRITE(TV_CSC_Y2,(color_conversion->by << 16) |
+		I915_WRITE(TV_CSC_Y2, (color_conversion->by << 16) |
 			   color_conversion->ay);
 		I915_WRITE(TV_CSC_U, (color_conversion->ru << 16) |
 			   color_conversion->gu);
@@ -1232,7 +1232,7 @@
  * \return false if TV is disconnected.
  */
 static int
-intel_tv_detect_type (struct intel_tv *intel_tv,
+intel_tv_detect_type(struct intel_tv *intel_tv,
 		      struct drm_connector *connector)
 {
 	struct drm_encoder *encoder = &intel_tv->base.base;
@@ -1486,7 +1486,7 @@
 }
 
 static void
-intel_tv_destroy (struct drm_connector *connector)
+intel_tv_destroy(struct drm_connector *connector)
 {
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 0583677..35ef5b1 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -21,16 +21,17 @@
              nv40_grctx.o nv50_grctx.o nvc0_grctx.o \
              nv84_crypt.o \
              nva3_copy.o nvc0_copy.o \
-             nv40_mpeg.o nv50_mpeg.o \
+             nv31_mpeg.o nv50_mpeg.o \
              nv04_instmem.o nv50_instmem.o nvc0_instmem.o \
-             nv50_evo.o nv50_crtc.o nv50_dac.o nv50_sor.o \
-             nv50_cursor.o nv50_display.o \
              nv04_dac.o nv04_dfp.o nv04_tv.o nv17_tv.o nv17_tv_modes.o \
              nv04_crtc.o nv04_display.o nv04_cursor.o \
+             nv50_evo.o nv50_crtc.o nv50_dac.o nv50_sor.o \
+             nv50_cursor.o nv50_display.o \
+             nvd0_display.o \
              nv04_fbcon.o nv50_fbcon.o nvc0_fbcon.o \
              nv10_gpio.o nv50_gpio.o \
 	     nv50_calc.o \
-	     nv04_pm.o nv50_pm.o nva3_pm.o \
+	     nv04_pm.o nv40_pm.o nv50_pm.o nva3_pm.o nvc0_pm.o \
 	     nv50_vram.o nvc0_vram.o \
 	     nv50_vm.o nvc0_vm.o
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 00a55df..fa22b28 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -37,8 +37,10 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 #include "nouveau_reg.h"
+#include "nouveau_encoder.h"
 
-static int nv40_get_intensity(struct backlight_device *bd)
+static int
+nv40_get_intensity(struct backlight_device *bd)
 {
 	struct drm_device *dev = bl_get_data(bd);
 	int val = (nv_rd32(dev, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK)
@@ -47,7 +49,8 @@
 	return val;
 }
 
-static int nv40_set_intensity(struct backlight_device *bd)
+static int
+nv40_set_intensity(struct backlight_device *bd)
 {
 	struct drm_device *dev = bl_get_data(bd);
 	int val = bd->props.brightness;
@@ -65,30 +68,8 @@
 	.update_status = nv40_set_intensity,
 };
 
-static int nv50_get_intensity(struct backlight_device *bd)
-{
-	struct drm_device *dev = bl_get_data(bd);
-
-	return nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT);
-}
-
-static int nv50_set_intensity(struct backlight_device *bd)
-{
-	struct drm_device *dev = bl_get_data(bd);
-	int val = bd->props.brightness;
-
-	nv_wr32(dev, NV50_PDISPLAY_SOR_BACKLIGHT,
-		val | NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE);
-	return 0;
-}
-
-static const struct backlight_ops nv50_bl_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.get_brightness = nv50_get_intensity,
-	.update_status = nv50_set_intensity,
-};
-
-static int nouveau_nv40_backlight_init(struct drm_connector *connector)
+static int
+nv40_backlight_init(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -113,34 +94,129 @@
 	return 0;
 }
 
-static int nouveau_nv50_backlight_init(struct drm_connector *connector)
+static int
+nv50_get_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div = 1025;
+	u32 val;
+
+	val  = nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(or));
+	val &= NV50_PDISP_SOR_PWM_CTL_VAL;
+	return ((val * 100) + (div / 2)) / div;
+}
+
+static int
+nv50_set_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div = 1025;
+	u32 val = (bd->props.brightness * div) / 100;
+
+	nv_wr32(dev, NV50_PDISP_SOR_PWM_CTL(or),
+		     NV50_PDISP_SOR_PWM_CTL_NEW | val);
+	return 0;
+}
+
+static const struct backlight_ops nv50_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.get_brightness = nv50_get_intensity,
+	.update_status = nv50_set_intensity,
+};
+
+static int
+nva3_get_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div, val;
+
+	div  = nv_rd32(dev, NV50_PDISP_SOR_PWM_DIV(or));
+	val  = nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(or));
+	val &= NVA3_PDISP_SOR_PWM_CTL_VAL;
+	if (div && div >= val)
+		return ((val * 100) + (div / 2)) / div;
+
+	return 100;
+}
+
+static int
+nva3_set_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div, val;
+
+	div = nv_rd32(dev, NV50_PDISP_SOR_PWM_DIV(or));
+	val = (bd->props.brightness * div) / 100;
+	if (div) {
+		nv_wr32(dev, NV50_PDISP_SOR_PWM_CTL(or), val |
+			     NV50_PDISP_SOR_PWM_CTL_NEW |
+			     NVA3_PDISP_SOR_PWM_CTL_UNK);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static const struct backlight_ops nva3_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.get_brightness = nva3_get_intensity,
+	.update_status = nva3_set_intensity,
+};
+
+static int
+nv50_backlight_init(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_encoder *nv_encoder;
 	struct backlight_properties props;
 	struct backlight_device *bd;
+	const struct backlight_ops *ops;
 
-	if (!nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT))
+	nv_encoder = find_encoder(connector, OUTPUT_LVDS);
+	if (!nv_encoder) {
+		nv_encoder = find_encoder(connector, OUTPUT_DP);
+		if (!nv_encoder)
+			return -ENODEV;
+	}
+
+	if (!nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
 		return 0;
 
+	if (dev_priv->chipset <= 0xa0 ||
+	    dev_priv->chipset == 0xaa ||
+	    dev_priv->chipset == 0xac)
+		ops = &nv50_bl_ops;
+	else
+		ops = &nva3_bl_ops;
+
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
-	props.max_brightness = 1025;
-	bd = backlight_device_register("nv_backlight", &connector->kdev, dev,
-				       &nv50_bl_ops, &props);
+	props.max_brightness = 100;
+	bd = backlight_device_register("nv_backlight", &connector->kdev,
+				       nv_encoder, ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.brightness = nv50_get_intensity(bd);
+	bd->props.brightness = bd->ops->get_brightness(bd);
 	backlight_update_status(bd);
 	return 0;
 }
 
-int nouveau_backlight_init(struct drm_connector *connector)
+int
+nouveau_backlight_init(struct drm_device *dev)
 {
-	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct drm_connector *connector;
 
 #ifdef CONFIG_ACPI
 	if (acpi_video_backlight_support()) {
@@ -150,21 +226,28 @@
 	}
 #endif
 
-	switch (dev_priv->card_type) {
-	case NV_40:
-		return nouveau_nv40_backlight_init(connector);
-	case NV_50:
-		return nouveau_nv50_backlight_init(connector);
-	default:
-		break;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
+		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+			continue;
+
+		switch (dev_priv->card_type) {
+		case NV_40:
+			return nv40_backlight_init(connector);
+		case NV_50:
+			return nv50_backlight_init(connector);
+		default:
+			break;
+		}
 	}
 
+
 	return 0;
 }
 
-void nouveau_backlight_exit(struct drm_connector *connector)
+void
+nouveau_backlight_exit(struct drm_device *dev)
 {
-	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->backlight) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index b311fab..032a820 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -296,6 +296,11 @@
 	if (dev_priv->card_type < NV_50)
 		return reg;
 
+	if (reg & 0x80000000) {
+		BUG_ON(bios->display.crtc < 0);
+		reg += bios->display.crtc * 0x800;
+	}
+
 	if (reg & 0x40000000) {
 		BUG_ON(!dcbent);
 
@@ -304,7 +309,7 @@
 			reg += 0x00000080;
 	}
 
-	reg &= ~0x60000000;
+	reg &= ~0xe0000000;
 	return reg;
 }
 
@@ -1174,22 +1179,19 @@
 	 *
 	 */
 
-	struct bit_displayport_encoder_table *dpe = NULL;
 	struct dcb_entry *dcb = bios->display.output;
 	struct drm_device *dev = bios->dev;
 	uint8_t cond = bios->data[offset + 1];
-	int dummy;
+	uint8_t *table, *entry;
 
 	BIOSLOG(bios, "0x%04X: subop 0x%02X\n", offset, cond);
 
 	if (!iexec->execute)
 		return 3;
 
-	dpe = nouveau_bios_dp_table(dev, dcb, &dummy);
-	if (!dpe) {
-		NV_ERROR(dev, "0x%04X: INIT_3A: no encoder table!!\n", offset);
+	table = nouveau_dp_bios_data(dev, dcb, &entry);
+	if (!table)
 		return 3;
-	}
 
 	switch (cond) {
 	case 0:
@@ -1203,7 +1205,7 @@
 		break;
 	case 1:
 	case 2:
-		if (!(dpe->unknown & cond))
+		if (!(entry[5] & cond))
 			iexec->execute = false;
 		break;
 	case 5:
@@ -3221,6 +3223,49 @@
 	return 1;
 }
 
+static void
+init_gpio_unknv50(struct nvbios *bios, struct dcb_gpio_entry *gpio)
+{
+	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
+	u32 r, s, v;
+
+	/* Not a clue, needs de-magicing */
+	r = nv50_gpio_ctl[gpio->line >> 4];
+	s = (gpio->line & 0x0f);
+	v = bios_rd32(bios, r) & ~(0x00010001 << s);
+	switch ((gpio->entry & 0x06000000) >> 25) {
+	case 1:
+		v |= (0x00000001 << s);
+		break;
+	case 2:
+		v |= (0x00010000 << s);
+		break;
+	default:
+		break;
+	}
+
+	bios_wr32(bios, r, v);
+}
+
+static void
+init_gpio_unknvd0(struct nvbios *bios, struct dcb_gpio_entry *gpio)
+{
+	u32 v, i;
+
+	v  = bios_rd32(bios, 0x00d610 + (gpio->line * 4));
+	v &= 0xffffff00;
+	v |= (gpio->entry & 0x00ff0000) >> 16;
+	bios_wr32(bios, 0x00d610 + (gpio->line * 4), v);
+
+	i = (gpio->entry & 0x1f000000) >> 24;
+	if (i) {
+		v  = bios_rd32(bios, 0x00d640 + ((i - 1) * 4));
+		v &= 0xffffff00;
+		v |= gpio->line;
+		bios_wr32(bios, 0x00d640 + ((i - 1) * 4), v);
+	}
+}
+
 static int
 init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
@@ -3235,7 +3280,6 @@
 
 	struct drm_nouveau_private *dev_priv = bios->dev->dev_private;
 	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
-	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
 	int i;
 
 	if (dev_priv->card_type < NV_50) {
@@ -3248,33 +3292,20 @@
 
 	for (i = 0; i < bios->dcb.gpio.entries; i++) {
 		struct dcb_gpio_entry *gpio = &bios->dcb.gpio.entry[i];
-		uint32_t r, s, v;
 
 		BIOSLOG(bios, "0x%04X: Entry: 0x%08X\n", offset, gpio->entry);
 
 		BIOSLOG(bios, "0x%04X: set gpio 0x%02x, state %d\n",
 			offset, gpio->tag, gpio->state_default);
-		if (bios->execute)
-			pgpio->set(bios->dev, gpio->tag, gpio->state_default);
 
-		/* The NVIDIA binary driver doesn't appear to actually do
-		 * any of this, my VBIOS does however.
-		 */
-		/* Not a clue, needs de-magicing */
-		r = nv50_gpio_ctl[gpio->line >> 4];
-		s = (gpio->line & 0x0f);
-		v = bios_rd32(bios, r) & ~(0x00010001 << s);
-		switch ((gpio->entry & 0x06000000) >> 25) {
-		case 1:
-			v |= (0x00000001 << s);
-			break;
-		case 2:
-			v |= (0x00010000 << s);
-			break;
-		default:
-			break;
-		}
-		bios_wr32(bios, r, v);
+		if (!bios->execute)
+			continue;
+
+		pgpio->set(bios->dev, gpio->tag, gpio->state_default);
+		if (dev_priv->card_type < NV_D0)
+			init_gpio_unknv50(bios, gpio);
+		else
+			init_gpio_unknvd0(bios, gpio);
 	}
 
 	return 1;
@@ -3737,6 +3768,10 @@
 	int count = 0, i, ret;
 	uint8_t id;
 
+	/* catch NULL script pointers */
+	if (offset == 0)
+		return 0;
+
 	/*
 	 * Loop until INIT_DONE causes us to break out of the loop
 	 * (or until offset > bios length just in case... )
@@ -4389,86 +4424,37 @@
 	return 0;
 }
 
-static uint8_t *
-bios_output_config_match(struct drm_device *dev, struct dcb_entry *dcbent,
-			 uint16_t record, int record_len, int record_nr,
-			 bool match_link)
+/* BIT 'U'/'d' table encoder subtables have hashes matching them to
+ * a particular set of encoders.
+ *
+ * This function returns true if a particular DCB entry matches.
+ */
+bool
+bios_encoder_match(struct dcb_entry *dcb, u32 hash)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->vbios;
-	uint32_t entry;
-	uint16_t table;
-	int i, v;
+	if ((hash & 0x000000f0) != (dcb->location << 4))
+		return false;
+	if ((hash & 0x0000000f) != dcb->type)
+		return false;
+	if (!(hash & (dcb->or << 16)))
+		return false;
 
-	switch (dcbent->type) {
+	switch (dcb->type) {
 	case OUTPUT_TMDS:
 	case OUTPUT_LVDS:
 	case OUTPUT_DP:
-		break;
-	default:
-		match_link = false;
-		break;
-	}
-
-	for (i = 0; i < record_nr; i++, record += record_len) {
-		table = ROM16(bios->data[record]);
-		if (!table)
-			continue;
-		entry = ROM32(bios->data[table]);
-
-		if (match_link) {
-			v = (entry & 0x00c00000) >> 22;
-			if (!(v & dcbent->sorconf.link))
-				continue;
+		if (hash & 0x00c00000) {
+			if (!(hash & (dcb->sorconf.link << 22)))
+				return false;
 		}
-
-		v = (entry & 0x000f0000) >> 16;
-		if (!(v & dcbent->or))
-			continue;
-
-		v = (entry & 0x000000f0) >> 4;
-		if (v != dcbent->location)
-			continue;
-
-		v = (entry & 0x0000000f);
-		if (v != dcbent->type)
-			continue;
-
-		return &bios->data[table];
+	default:
+		return true;
 	}
-
-	return NULL;
-}
-
-void *
-nouveau_bios_dp_table(struct drm_device *dev, struct dcb_entry *dcbent,
-		      int *length)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->vbios;
-	uint8_t *table;
-
-	if (!bios->display.dp_table_ptr) {
-		NV_ERROR(dev, "No pointer to DisplayPort table\n");
-		return NULL;
-	}
-	table = &bios->data[bios->display.dp_table_ptr];
-
-	if (table[0] != 0x20 && table[0] != 0x21) {
-		NV_ERROR(dev, "DisplayPort table version 0x%02x unknown\n",
-			 table[0]);
-		return NULL;
-	}
-
-	*length = table[4];
-	return bios_output_config_match(dev, dcbent,
-					bios->display.dp_table_ptr + table[1],
-					table[2], table[3], table[0] >= 0x21);
 }
 
 int
-nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
-			       uint32_t sub, int pxclk)
+nouveau_bios_run_display_table(struct drm_device *dev, u16 type, int pclk,
+			       struct dcb_entry *dcbent, int crtc)
 {
 	/*
 	 * The display script table is located by the BIT 'U' table.
@@ -4498,7 +4484,7 @@
 	uint8_t *table = &bios->data[bios->display.script_table_ptr];
 	uint8_t *otable = NULL;
 	uint16_t script;
-	int i = 0;
+	int i;
 
 	if (!bios->display.script_table_ptr) {
 		NV_ERROR(dev, "No pointer to output script table\n");
@@ -4550,30 +4536,33 @@
 
 	NV_DEBUG_KMS(dev, "Searching for output entry for %d %d %d\n",
 			dcbent->type, dcbent->location, dcbent->or);
-	otable = bios_output_config_match(dev, dcbent, table[1] +
-					  bios->display.script_table_ptr,
-					  table[2], table[3], table[0] >= 0x21);
+	for (i = 0; i < table[3]; i++) {
+		otable = ROMPTR(bios, table[table[1] + (i * table[2])]);
+		if (otable && bios_encoder_match(dcbent, ROM32(otable[0])))
+			break;
+	}
+
 	if (!otable) {
 		NV_DEBUG_KMS(dev, "failed to match any output table\n");
 		return 1;
 	}
 
-	if (pxclk < -2 || pxclk > 0) {
+	if (pclk < -2 || pclk > 0) {
 		/* Try to find matching script table entry */
 		for (i = 0; i < otable[5]; i++) {
-			if (ROM16(otable[table[4] + i*6]) == sub)
+			if (ROM16(otable[table[4] + i*6]) == type)
 				break;
 		}
 
 		if (i == otable[5]) {
 			NV_ERROR(dev, "Table 0x%04x not found for %d/%d, "
 				      "using first\n",
-				 sub, dcbent->type, dcbent->or);
+				 type, dcbent->type, dcbent->or);
 			i = 0;
 		}
 	}
 
-	if (pxclk == 0) {
+	if (pclk == 0) {
 		script = ROM16(otable[6]);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "output script 0 not found\n");
@@ -4581,9 +4570,9 @@
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk == -1) {
+	if (pclk == -1) {
 		script = ROM16(otable[8]);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "output script 1 not found\n");
@@ -4591,9 +4580,9 @@
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk == -2) {
+	if (pclk == -2) {
 		if (table[4] >= 12)
 			script = ROM16(otable[10]);
 		else
@@ -4604,31 +4593,31 @@
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 2\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk > 0) {
+	if (pclk > 0) {
 		script = ROM16(otable[table[4] + i*6 + 2]);
 		if (script)
-			script = clkcmptable(bios, script, pxclk);
+			script = clkcmptable(bios, script, pclk);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "clock script 0 not found\n");
 			return 1;
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk < 0) {
+	if (pclk < 0) {
 		script = ROM16(otable[table[4] + i*6 + 4]);
 		if (script)
-			script = clkcmptable(bios, script, -pxclk);
+			script = clkcmptable(bios, script, -pclk);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "clock script 1 not found\n");
 			return 1;
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	}
 
 	return 0;
@@ -5478,14 +5467,6 @@
 	return 0;
 }
 
-static int
-parse_bit_displayport_tbl_entry(struct drm_device *dev, struct nvbios *bios,
-				struct bit_entry *bitentry)
-{
-	bios->display.dp_table_ptr = ROM16(bios->data[bitentry->offset]);
-	return 0;
-}
-
 struct bit_table {
 	const char id;
 	int (* const parse_fn)(struct drm_device *, struct nvbios *, struct bit_entry *);
@@ -5559,7 +5540,6 @@
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('L', lvds));
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('T', tmds));
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('U', U));
-	parse_bit_table(bios, bitoffset, &BIT_TABLE('d', displayport));
 
 	return 0;
 }
@@ -5884,9 +5864,15 @@
 			}
 
 			e->line = (e->entry & 0x0000001f) >> 0;
-			e->state_default = (e->entry & 0x01000000) >> 24;
-			e->state[0] = (e->entry & 0x18000000) >> 27;
-			e->state[1] = (e->entry & 0x60000000) >> 29;
+			if (gpio[0] == 0x40) {
+				e->state_default = (e->entry & 0x01000000) >> 24;
+				e->state[0] = (e->entry & 0x18000000) >> 27;
+				e->state[1] = (e->entry & 0x60000000) >> 29;
+			} else {
+				e->state_default = (e->entry & 0x00000080) >> 7;
+				e->state[0] = (entry[4] >> 4) & 3;
+				e->state[1] = (entry[4] >> 6) & 3;
+			}
 		}
 	}
 
@@ -6156,7 +6142,14 @@
 	}
 	case OUTPUT_DP:
 		entry->dpconf.sor.link = (conf & 0x00000030) >> 4;
-		entry->dpconf.link_bw = (conf & 0x00e00000) >> 21;
+		switch ((conf & 0x00e00000) >> 21) {
+		case 0:
+			entry->dpconf.link_bw = 162000;
+			break;
+		default:
+			entry->dpconf.link_bw = 270000;
+			break;
+		}
 		switch ((conf & 0x0f000000) >> 24) {
 		case 0xf:
 			entry->dpconf.link_nr = 4;
@@ -6769,7 +6762,7 @@
 
 void
 nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
-			    struct dcb_entry *dcbent)
+			    struct dcb_entry *dcbent, int crtc)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvbios *bios = &dev_priv->vbios;
@@ -6777,11 +6770,22 @@
 
 	spin_lock_bh(&bios->lock);
 	bios->display.output = dcbent;
+	bios->display.crtc = crtc;
 	parse_init_table(bios, table, &iexec);
 	bios->display.output = NULL;
 	spin_unlock_bh(&bios->lock);
 }
 
+void
+nouveau_bios_init_exec(struct drm_device *dev, uint16_t table)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	struct init_exec iexec = { true, false };
+
+	parse_init_table(bios, table, &iexec);
+}
+
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -6863,9 +6867,8 @@
 
 	if (dev_priv->card_type >= NV_50) {
 		for (i = 0; i < bios->dcb.entries; i++) {
-			nouveau_bios_run_display_table(dev,
-						       &bios->dcb.entry[i],
-						       0, 0);
+			nouveau_bios_run_display_table(dev, 0, 0,
+						       &bios->dcb.entry[i], -1);
 		}
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 050c314..8adb69e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -289,8 +289,8 @@
 
 	struct {
 		struct dcb_entry *output;
+		int crtc;
 		uint16_t script_table_ptr;
-		uint16_t dp_table_ptr;
 	} display;
 
 	struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 890d50e..424dff5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -956,7 +956,7 @@
 			break;
 		}
 
-		if (dev_priv->card_type == NV_C0)
+		if (dev_priv->card_type >= NV_C0)
 			page_shift = node->page_shift;
 		else
 			page_shift = 12;
@@ -1104,7 +1104,8 @@
 	if (vma->node) {
 		if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
 			spin_lock(&nvbo->bo.bdev->fence_lock);
-			ttm_bo_wait(&nvbo->bo, false, false, false);
+			ttm_bo_wait(&nvbo->bo, false, false, false,
+				    TTM_USAGE_READWRITE);
 			spin_unlock(&nvbo->bo.bdev->fence_lock);
 			nouveau_vm_unmap(vma);
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index b0d753f..a319d56 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -411,13 +411,17 @@
 		return ret;
 	init->channel  = chan->id;
 
-	if (chan->dma.ib_max)
-		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
-					NOUVEAU_GEM_DOMAIN_GART;
-	else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+	if (nouveau_vram_pushbuf == 0) {
+		if (chan->dma.ib_max)
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+						NOUVEAU_GEM_DOMAIN_GART;
+		else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
+		else
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+	} else {
 		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
-	else
-		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+	}
 
 	if (dev_priv->card_type < NV_C0) {
 		init->subchan[0].handle = NvM2MF;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 939d4df..e0d275e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -39,7 +39,7 @@
 
 static void nouveau_connector_hotplug(void *, int);
 
-static struct nouveau_encoder *
+struct nouveau_encoder *
 find_encoder(struct drm_connector *connector, int type)
 {
 	struct drm_device *dev = connector->dev;
@@ -116,10 +116,6 @@
 				      nouveau_connector_hotplug, connector);
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		nouveau_backlight_exit(connector);
-
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
@@ -712,11 +708,8 @@
 	case OUTPUT_TV:
 		return get_slave_funcs(encoder)->mode_valid(encoder, mode);
 	case OUTPUT_DP:
-		if (nv_encoder->dp.link_bw == DP_LINK_BW_2_7)
-			max_clock = nv_encoder->dp.link_nr * 270000;
-		else
-			max_clock = nv_encoder->dp.link_nr * 162000;
-
+		max_clock  = nv_encoder->dp.link_nr;
+		max_clock *= nv_encoder->dp.link_bw;
 		clock = clock * nouveau_connector_bpp(connector) / 8;
 		break;
 	default:
@@ -871,7 +864,6 @@
 					dev->mode_config.scaling_mode_property,
 					nv_connector->scaling_mode);
 		}
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		/* fall-through */
 	case DCB_CONNECTOR_TV_0:
 	case DCB_CONNECTOR_TV_1:
@@ -888,27 +880,20 @@
 				dev->mode_config.dithering_mode_property,
 				nv_connector->use_dithering ?
 				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
-
-		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) {
-			if (dev_priv->card_type >= NV_50)
-				connector->polled = DRM_CONNECTOR_POLL_HPD;
-			else
-				connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-		}
 		break;
 	}
 
-	if (pgpio->irq_register) {
+	if (nv_connector->dcb->gpio_tag != 0xff && pgpio->irq_register) {
 		pgpio->irq_register(dev, nv_connector->dcb->gpio_tag,
 				    nouveau_connector_hotplug, connector);
+
+		connector->polled = DRM_CONNECTOR_POLL_HPD;
+	} else {
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 	}
 
 	drm_sysfs_connector_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		nouveau_backlight_init(connector);
-
 	dcb->drm = connector;
 	return dcb->drm;
 
@@ -925,22 +910,13 @@
 	struct drm_connector *connector = data;
 	struct drm_device *dev = connector->dev;
 
-	NV_INFO(dev, "%splugged %s\n", plugged ? "" : "un",
-		drm_get_connector_name(connector));
+	NV_DEBUG(dev, "%splugged %s\n", plugged ? "" : "un",
+		 drm_get_connector_name(connector));
 
-	if (connector->encoder && connector->encoder->crtc &&
-	    connector->encoder->crtc->enabled) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(connector->encoder);
-		struct drm_encoder_helper_funcs *helper =
-			connector->encoder->helper_private;
-
-		if (nv_encoder->dcb->type == OUTPUT_DP) {
-			if (plugged)
-				helper->dpms(connector->encoder, DRM_MODE_DPMS_ON);
-			else
-				helper->dpms(connector->encoder, DRM_MODE_DPMS_OFF);
-		}
-	}
+	if (plugged)
+		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+	else
+		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
 
 	drm_helper_hpd_irq_event(dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index cb1ce2a..bf8e128 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -82,14 +82,13 @@
 }
 
 int nv50_crtc_create(struct drm_device *dev, int index);
-int nv50_cursor_init(struct nouveau_crtc *);
-void nv50_cursor_fini(struct nouveau_crtc *);
 int nv50_crtc_cursor_set(struct drm_crtc *drm_crtc, struct drm_file *file_priv,
 			 uint32_t buffer_handle, uint32_t width,
 			 uint32_t height);
 int nv50_crtc_cursor_move(struct drm_crtc *drm_crtc, int x, int y);
 
 int nv04_cursor_init(struct nouveau_crtc *);
+int nv50_cursor_init(struct nouveau_crtc *);
 
 struct nouveau_connector *
 nouveau_crtc_connector_get(struct nouveau_crtc *crtc);
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index eb514ea..ddbabef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -105,9 +105,12 @@
 		if (dev_priv->chipset == 0x50)
 			nv_fb->r_format |= (tile_flags << 8);
 
-		if (!tile_flags)
-			nv_fb->r_pitch = 0x00100000 | fb->pitch;
-		else {
+		if (!tile_flags) {
+			if (dev_priv->card_type < NV_D0)
+				nv_fb->r_pitch = 0x00100000 | fb->pitch;
+			else
+				nv_fb->r_pitch = 0x01000000 | fb->pitch;
+		} else {
 			u32 mode = nvbo->tile_mode;
 			if (dev_priv->card_type >= NV_C0)
 				mode >>= 4;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 7beb82a..de5efe7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -28,418 +28,619 @@
 #include "nouveau_i2c.h"
 #include "nouveau_connector.h"
 #include "nouveau_encoder.h"
+#include "nouveau_crtc.h"
+
+/******************************************************************************
+ * aux channel util functions
+ *****************************************************************************/
+#define AUX_DBG(fmt, args...) do {                                             \
+	if (nouveau_reg_debug & NOUVEAU_REG_DEBUG_AUXCH) {                     \
+		NV_PRINTK(KERN_DEBUG, dev, "AUXCH(%d): " fmt, ch, ##args);     \
+	}                                                                      \
+} while (0)
+#define AUX_ERR(fmt, args...) NV_ERROR(dev, "AUXCH(%d): " fmt, ch, ##args)
+
+static void
+auxch_fini(struct drm_device *dev, int ch)
+{
+	nv_mask(dev, 0x00e4e4 + (ch * 0x50), 0x00310000, 0x00000000);
+}
 
 static int
-auxch_rd(struct drm_encoder *encoder, int address, uint8_t *buf, int size)
+auxch_init(struct drm_device *dev, int ch)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_i2c_chan *auxch;
-	int ret;
+	const u32 unksel = 1; /* nfi which to use, or if it matters.. */
+	const u32 ureq = unksel ? 0x00100000 : 0x00200000;
+	const u32 urep = unksel ? 0x01000000 : 0x02000000;
+	u32 ctrl, timeout;
 
-	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
-	if (!auxch)
-		return -ENODEV;
+	/* wait up to 1ms for any previous transaction to be done... */
+	timeout = 1000;
+	do {
+		ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+		udelay(1);
+		if (!timeout--) {
+			AUX_ERR("begin idle timeout 0x%08x", ctrl);
+			return -EBUSY;
+		}
+	} while (ctrl & 0x03010000);
 
-	ret = nouveau_dp_auxch(auxch, 9, address, buf, size);
-	if (ret)
-		return ret;
+	/* set some magic, and wait up to 1ms for it to appear */
+	nv_mask(dev, 0x00e4e4 + (ch * 0x50), 0x00300000, ureq);
+	timeout = 1000;
+	do {
+		ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+		udelay(1);
+		if (!timeout--) {
+			AUX_ERR("magic wait 0x%08x\n", ctrl);
+			auxch_fini(dev, ch);
+			return -EBUSY;
+		}
+	} while ((ctrl & 0x03000000) != urep);
 
 	return 0;
 }
 
 static int
-auxch_wr(struct drm_encoder *encoder, int address, uint8_t *buf, int size)
+auxch_tx(struct drm_device *dev, int ch, u8 type, u32 addr, u8 *data, u8 size)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_i2c_chan *auxch;
-	int ret;
+	u32 ctrl, stat, timeout, retries;
+	u32 xbuf[4] = {};
+	int ret, i;
 
-	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
-	if (!auxch)
-		return -ENODEV;
+	AUX_DBG("%d: 0x%08x %d\n", type, addr, size);
 
-	ret = nouveau_dp_auxch(auxch, 8, address, buf, size);
+	ret = auxch_init(dev, ch);
+	if (ret)
+		goto out;
+
+	stat = nv_rd32(dev, 0x00e4e8 + (ch * 0x50));
+	if (!(stat & 0x10000000)) {
+		AUX_DBG("sink not detected\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!(type & 1)) {
+		memcpy(xbuf, data, size);
+		for (i = 0; i < 16; i += 4) {
+			AUX_DBG("wr 0x%08x\n", xbuf[i / 4]);
+			nv_wr32(dev, 0x00e4c0 + (ch * 0x50) + i, xbuf[i / 4]);
+		}
+	}
+
+	ctrl  = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+	ctrl &= ~0x0001f0ff;
+	ctrl |= type << 12;
+	ctrl |= size - 1;
+	nv_wr32(dev, 0x00e4e0 + (ch * 0x50), addr);
+
+	/* retry transaction a number of times on failure... */
+	ret = -EREMOTEIO;
+	for (retries = 0; retries < 32; retries++) {
+		/* reset, and delay a while if this is a retry */
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x80000000 | ctrl);
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x00000000 | ctrl);
+		if (retries)
+			udelay(400);
+
+		/* transaction request, wait up to 1ms for it to complete */
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x00010000 | ctrl);
+
+		timeout = 1000;
+		do {
+			ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+			udelay(1);
+			if (!timeout--) {
+				AUX_ERR("tx req timeout 0x%08x\n", ctrl);
+				goto out;
+			}
+		} while (ctrl & 0x00010000);
+
+		/* read status, and check if transaction completed ok */
+		stat = nv_mask(dev, 0x00e4e8 + (ch * 0x50), 0, 0);
+		if (!(stat & 0x000f0f00)) {
+			ret = 0;
+			break;
+		}
+
+		AUX_DBG("%02d 0x%08x 0x%08x\n", retries, ctrl, stat);
+	}
+
+	if (type & 1) {
+		for (i = 0; i < 16; i += 4) {
+			xbuf[i / 4] = nv_rd32(dev, 0x00e4d0 + (ch * 0x50) + i);
+			AUX_DBG("rd 0x%08x\n", xbuf[i / 4]);
+		}
+		memcpy(data, xbuf, size);
+	}
+
+out:
+	auxch_fini(dev, ch);
 	return ret;
 }
 
-static int
-nouveau_dp_lane_count_set(struct drm_encoder *encoder, uint8_t cmd)
+static u32
+dp_link_bw_get(struct drm_device *dev, int or, int link)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
-
-	tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-	tmp &= ~(NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED |
-		 NV50_SOR_DP_CTRL_LANE_MASK);
-	tmp |= ((1 << (cmd & DP_LANE_COUNT_MASK)) - 1) << 16;
-	if (cmd & DP_LANE_COUNT_ENHANCED_FRAME_EN)
-		tmp |= NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED;
-	nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp);
-
-	return auxch_wr(encoder, DP_LANE_COUNT_SET, &cmd, 1);
+	u32 ctrl = nv_rd32(dev, 0x614300 + (or * 0x800));
+	if (!(ctrl & 0x000c0000))
+		return 162000;
+	return 270000;
 }
 
 static int
-nouveau_dp_link_bw_set(struct drm_encoder *encoder, uint8_t cmd)
+dp_lane_count_get(struct drm_device *dev, int or, int link)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	int reg = 0x614300 + (nv_encoder->or * 0x800);
+	u32 ctrl = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
+	switch (ctrl & 0x000f0000) {
+	case 0x00010000: return 1;
+	case 0x00030000: return 2;
+	default:
+		return 4;
+	}
+}
 
-	tmp  = nv_rd32(dev, reg);
-	tmp &= 0xfff3ffff;
-	if (cmd == DP_LINK_BW_2_7)
-		tmp |= 0x00040000;
-	nv_wr32(dev, reg, tmp);
+void
+nouveau_dp_tu_update(struct drm_device *dev, int or, int link, u32 clk, u32 bpp)
+{
+	const u32 symbol = 100000;
+	int bestTU = 0, bestVTUi = 0, bestVTUf = 0, bestVTUa = 0;
+	int TU, VTUi, VTUf, VTUa;
+	u64 link_data_rate, link_ratio, unk;
+	u32 best_diff = 64 * symbol;
+	u32 link_nr, link_bw, r;
 
-	return auxch_wr(encoder, DP_LINK_BW_SET, &cmd, 1);
+	/* calculate packed data rate for each lane */
+	link_nr = dp_lane_count_get(dev, or, link);
+	link_data_rate = (clk * bpp / 8) / link_nr;
+
+	/* calculate ratio of packed data rate to link symbol rate */
+	link_bw = dp_link_bw_get(dev, or, link);
+	link_ratio = link_data_rate * symbol;
+	r = do_div(link_ratio, link_bw);
+
+	for (TU = 64; TU >= 32; TU--) {
+		/* calculate average number of valid symbols in each TU */
+		u32 tu_valid = link_ratio * TU;
+		u32 calc, diff;
+
+		/* find a hw representation for the fraction.. */
+		VTUi = tu_valid / symbol;
+		calc = VTUi * symbol;
+		diff = tu_valid - calc;
+		if (diff) {
+			if (diff >= (symbol / 2)) {
+				VTUf = symbol / (symbol - diff);
+				if (symbol - (VTUf * diff))
+					VTUf++;
+
+				if (VTUf <= 15) {
+					VTUa  = 1;
+					calc += symbol - (symbol / VTUf);
+				} else {
+					VTUa  = 0;
+					VTUf  = 1;
+					calc += symbol;
+				}
+			} else {
+				VTUa  = 0;
+				VTUf  = min((int)(symbol / diff), 15);
+				calc += symbol / VTUf;
+			}
+
+			diff = calc - tu_valid;
+		} else {
+			/* no remainder, but the hw doesn't like the fractional
+			 * part to be zero.  decrement the integer part and
+			 * have the fraction add a whole symbol back
+			 */
+			VTUa = 0;
+			VTUf = 1;
+			VTUi--;
+		}
+
+		if (diff < best_diff) {
+			best_diff = diff;
+			bestTU = TU;
+			bestVTUa = VTUa;
+			bestVTUf = VTUf;
+			bestVTUi = VTUi;
+			if (diff == 0)
+				break;
+		}
+	}
+
+	if (!bestTU) {
+		NV_ERROR(dev, "DP: unable to find suitable config\n");
+		return;
+	}
+
+	/* XXX close to vbios numbers, but not right */
+	unk  = (symbol - link_ratio) * bestTU;
+	unk *= link_ratio;
+	r = do_div(unk, symbol);
+	r = do_div(unk, symbol);
+	unk += 6;
+
+	nv_mask(dev, NV50_SOR_DP_CTRL(or, link), 0x000001fc, bestTU << 2);
+	nv_mask(dev, NV50_SOR_DP_SCFG(or, link), 0x010f7f3f, bestVTUa << 24 |
+							     bestVTUf << 16 |
+							     bestVTUi << 8 |
+							     unk);
+}
+
+u8 *
+nouveau_dp_bios_data(struct drm_device *dev, struct dcb_entry *dcb, u8 **entry)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	struct bit_entry d;
+	u8 *table;
+	int i;
+
+	if (bit_table(dev, 'd', &d)) {
+		NV_ERROR(dev, "BIT 'd' table not found\n");
+		return NULL;
+	}
+
+	if (d.version != 1) {
+		NV_ERROR(dev, "BIT 'd' table version %d unknown\n", d.version);
+		return NULL;
+	}
+
+	table = ROMPTR(bios, d.data[0]);
+	if (!table) {
+		NV_ERROR(dev, "displayport table pointer invalid\n");
+		return NULL;
+	}
+
+	switch (table[0]) {
+	case 0x20:
+	case 0x21:
+	case 0x30:
+		break;
+	default:
+		NV_ERROR(dev, "displayport table 0x%02x unknown\n", table[0]);
+		return NULL;
+	}
+
+	for (i = 0; i < table[3]; i++) {
+		*entry = ROMPTR(bios, table[table[1] + (i * table[2])]);
+		if (*entry && bios_encoder_match(dcb, ROM32((*entry)[0])))
+			return table;
+	}
+
+	NV_ERROR(dev, "displayport encoder table not found\n");
+	return NULL;
+}
+
+/******************************************************************************
+ * link training
+ *****************************************************************************/
+struct dp_state {
+	struct dcb_entry *dcb;
+	u8 *table;
+	u8 *entry;
+	int auxch;
+	int crtc;
+	int or;
+	int link;
+	u8 *dpcd;
+	int link_nr;
+	u32 link_bw;
+	u8  stat[6];
+	u8  conf[4];
+};
+
+static void
+dp_set_link_config(struct drm_device *dev, struct dp_state *dp)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int or = dp->or, link = dp->link;
+	u8 *entry, sink[2];
+	u32 dp_ctrl;
+	u16 script;
+
+	NV_DEBUG_KMS(dev, "%d lanes at %d KB/s\n", dp->link_nr, dp->link_bw);
+
+	/* set selected link rate on source */
+	switch (dp->link_bw) {
+	case 270000:
+		nv_mask(dev, 0x614300 + (or * 0x800), 0x000c0000, 0x00040000);
+		sink[0] = DP_LINK_BW_2_7;
+		break;
+	default:
+		nv_mask(dev, 0x614300 + (or * 0x800), 0x000c0000, 0x00000000);
+		sink[0] = DP_LINK_BW_1_62;
+		break;
+	}
+
+	/* offset +0x0a of each dp encoder table entry is a pointer to another
+	 * table, that has (among other things) pointers to more scripts that
+	 * need to be executed, this time depending on link speed.
+	 */
+	entry = ROMPTR(&dev_priv->vbios, dp->entry[10]);
+	if (entry) {
+		if (dp->table[0] < 0x30) {
+			while (dp->link_bw < (ROM16(entry[0]) * 10))
+				entry += 4;
+			script = ROM16(entry[2]);
+		} else {
+			while (dp->link_bw < (entry[0] * 27000))
+				entry += 3;
+			script = ROM16(entry[1]);
+		}
+
+		nouveau_bios_run_init_table(dev, script, dp->dcb, dp->crtc);
+	}
+
+	/* configure lane count on the source */
+	dp_ctrl = ((1 << dp->link_nr) - 1) << 16;
+	sink[1] = dp->link_nr;
+	if (dp->dpcd[2] & DP_ENHANCED_FRAME_CAP) {
+		dp_ctrl |= 0x00004000;
+		sink[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	}
+
+	nv_mask(dev, NV50_SOR_DP_CTRL(or, link), 0x001f4000, dp_ctrl);
+
+	/* inform the sink of the new configuration */
+	auxch_tx(dev, dp->auxch, 8, DP_LINK_BW_SET, sink, 2);
+}
+
+static void
+dp_set_training_pattern(struct drm_device *dev, struct dp_state *dp, u8 tp)
+{
+	u8 sink_tp;
+
+	NV_DEBUG_KMS(dev, "training pattern %d\n", tp);
+
+	nv_mask(dev, NV50_SOR_DP_CTRL(dp->or, dp->link), 0x0f000000, tp << 24);
+
+	auxch_tx(dev, dp->auxch, 9, DP_TRAINING_PATTERN_SET, &sink_tp, 1);
+	sink_tp &= ~DP_TRAINING_PATTERN_MASK;
+	sink_tp |= tp;
+	auxch_tx(dev, dp->auxch, 8, DP_TRAINING_PATTERN_SET, &sink_tp, 1);
+}
+
+static const u8 nv50_lane_map[] = { 16, 8, 0, 24 };
+static const u8 nvaf_lane_map[] = { 24, 16, 8, 0 };
+
+static int
+dp_link_train_commit(struct drm_device *dev, struct dp_state *dp)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 mask = 0, drv = 0, pre = 0, unk = 0;
+	const u8 *shifts;
+	int link = dp->link;
+	int or = dp->or;
+	int i;
+
+	if (dev_priv->chipset != 0xaf)
+		shifts = nv50_lane_map;
+	else
+		shifts = nvaf_lane_map;
+
+	for (i = 0; i < dp->link_nr; i++) {
+		u8 *conf = dp->entry + dp->table[4];
+		u8 lane = (dp->stat[4 + (i >> 1)] >> ((i & 1) * 4)) & 0xf;
+		u8 lpre = (lane & 0x0c) >> 2;
+		u8 lvsw = (lane & 0x03) >> 0;
+
+		mask |= 0xff << shifts[i];
+		unk |= 1 << (shifts[i] >> 3);
+
+		dp->conf[i] = (lpre << 3) | lvsw;
+		if (lvsw == DP_TRAIN_VOLTAGE_SWING_1200)
+			dp->conf[i] |= DP_TRAIN_MAX_SWING_REACHED;
+		if (lpre == DP_TRAIN_PRE_EMPHASIS_9_5)
+			dp->conf[i] |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+		NV_DEBUG_KMS(dev, "config lane %d %02x\n", i, dp->conf[i]);
+
+		if (dp->table[0] < 0x30) {
+			u8 *last = conf + (dp->entry[4] * dp->table[5]);
+			while (lvsw != conf[0] || lpre != conf[1]) {
+				conf += dp->table[5];
+				if (conf >= last)
+					return -EINVAL;
+			}
+
+			conf += 2;
+		} else {
+			/* no lookup table anymore, set entries for each
+			 * combination of voltage swing and pre-emphasis
+			 * level allowed by the DP spec.
+			 */
+			switch (lvsw) {
+			case 0: lpre += 0; break;
+			case 1: lpre += 4; break;
+			case 2: lpre += 7; break;
+			case 3: lpre += 9; break;
+			}
+
+			conf = conf + (lpre * dp->table[5]);
+			conf++;
+		}
+
+		drv |= conf[0] << shifts[i];
+		pre |= conf[1] << shifts[i];
+		unk  = (unk & ~0x0000ff00) | (conf[2] << 8);
+	}
+
+	nv_mask(dev, NV50_SOR_DP_UNK118(or, link), mask, drv);
+	nv_mask(dev, NV50_SOR_DP_UNK120(or, link), mask, pre);
+	nv_mask(dev, NV50_SOR_DP_UNK130(or, link), 0x0000ff0f, unk);
+
+	return auxch_tx(dev, dp->auxch, 8, DP_TRAINING_LANE0_SET, dp->conf, 4);
 }
 
 static int
-nouveau_dp_link_train_set(struct drm_encoder *encoder, int pattern)
+dp_link_train_update(struct drm_device *dev, struct dp_state *dp, u32 delay)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	uint8_t cmd;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
 	int ret;
 
-	tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-	tmp &= ~NV50_SOR_DP_CTRL_TRAINING_PATTERN;
-	tmp |= (pattern << 24);
-	nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp);
+	udelay(delay);
 
-	ret = auxch_rd(encoder, DP_TRAINING_PATTERN_SET, &cmd, 1);
+	ret = auxch_tx(dev, dp->auxch, 9, DP_LANE0_1_STATUS, dp->stat, 6);
 	if (ret)
 		return ret;
-	cmd &= ~DP_TRAINING_PATTERN_MASK;
-	cmd |= (pattern & DP_TRAINING_PATTERN_MASK);
-	return auxch_wr(encoder, DP_TRAINING_PATTERN_SET, &cmd, 1);
+
+	NV_DEBUG_KMS(dev, "status %02x %02x %02x %02x %02x %02x\n",
+		     dp->stat[0], dp->stat[1], dp->stat[2], dp->stat[3],
+		     dp->stat[4], dp->stat[5]);
+	return 0;
 }
 
 static int
-nouveau_dp_max_voltage_swing(struct drm_encoder *encoder)
+dp_link_train_cr(struct drm_device *dev, struct dp_state *dp)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int i, dpe_headerlen, max_vs = 0;
+	bool cr_done = false, abort = false;
+	int voltage = dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+	int tries = 0, i;
 
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
+	dp_set_training_pattern(dev, dp, DP_TRAINING_PATTERN_1);
 
-	for (i = 0; i < dpe_headerlen; i++, dpse++) {
-		if (dpse->vs_level > max_vs)
-			max_vs = dpse->vs_level;
-	}
-
-	return max_vs;
-}
-
-static int
-nouveau_dp_max_pre_emphasis(struct drm_encoder *encoder, int vs)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int i, dpe_headerlen, max_pre = 0;
-
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
-
-	for (i = 0; i < dpe_headerlen; i++, dpse++) {
-		if (dpse->vs_level != vs)
-			continue;
-
-		if (dpse->pre_level > max_pre)
-			max_pre = dpse->pre_level;
-	}
-
-	return max_pre;
-}
-
-static bool
-nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table *dpe;
-	int ret, i, dpe_headerlen, vs = 0, pre = 0;
-	uint8_t request[2];
-
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-
-	ret = auxch_rd(encoder, DP_ADJUST_REQUEST_LANE0_1, request, 2);
-	if (ret)
-		return false;
-
-	NV_DEBUG_KMS(dev, "\t\tadjust 0x%02x 0x%02x\n", request[0], request[1]);
-
-	/* Keep all lanes at the same level.. */
-	for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-		int lane_req = (request[i >> 1] >> ((i & 1) << 2)) & 0xf;
-		int lane_vs = lane_req & 3;
-		int lane_pre = (lane_req >> 2) & 3;
-
-		if (lane_vs > vs)
-			vs = lane_vs;
-		if (lane_pre > pre)
-			pre = lane_pre;
-	}
-
-	if (vs >= nouveau_dp_max_voltage_swing(encoder)) {
-		vs  = nouveau_dp_max_voltage_swing(encoder);
-		vs |= 4;
-	}
-
-	if (pre >= nouveau_dp_max_pre_emphasis(encoder, vs & 3)) {
-		pre  = nouveau_dp_max_pre_emphasis(encoder, vs & 3);
-		pre |= 4;
-	}
-
-	/* Update the configuration for all lanes.. */
-	for (i = 0; i < nv_encoder->dp.link_nr; i++)
-		config[i] = (pre << 3) | vs;
-
-	return true;
-}
-
-static bool
-nouveau_dp_link_train_commit(struct drm_encoder *encoder, uint8_t *config)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
-	int dpe_headerlen, ret, i;
-
-	NV_DEBUG_KMS(dev, "\t\tconfig 0x%02x 0x%02x 0x%02x 0x%02x\n",
-		 config[0], config[1], config[2], config[3]);
-
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
-
-	for (i = 0; i < dpe->record_nr; i++, dpse++) {
-		if (dpse->vs_level == (config[0] & 3) &&
-		    dpse->pre_level == ((config[0] >> 3) & 3))
+	do {
+		if (dp_link_train_commit(dev, dp) ||
+		    dp_link_train_update(dev, dp, 100))
 			break;
-	}
-	BUG_ON(i == dpe->record_nr);
 
-	for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-		const int shift[4] = { 16, 8, 0, 24 };
-		uint32_t mask = 0xff << shift[i];
-		uint32_t reg0, reg1, reg2;
+		cr_done = true;
+		for (i = 0; i < dp->link_nr; i++) {
+			u8 lane = (dp->stat[i >> 1] >> ((i & 1) * 4)) & 0xf;
+			if (!(lane & DP_LANE_CR_DONE)) {
+				cr_done = false;
+				if (dp->conf[i] & DP_TRAIN_MAX_SWING_REACHED)
+					abort = true;
+				break;
+			}
+		}
 
-		reg0  = nv_rd32(dev, NV50_SOR_DP_UNK118(or, link)) & ~mask;
-		reg0 |= (dpse->reg0 << shift[i]);
-		reg1  = nv_rd32(dev, NV50_SOR_DP_UNK120(or, link)) & ~mask;
-		reg1 |= (dpse->reg1 << shift[i]);
-		reg2  = nv_rd32(dev, NV50_SOR_DP_UNK130(or, link)) & 0xffff00ff;
-		reg2 |= (dpse->reg2 << 8);
-		nv_wr32(dev, NV50_SOR_DP_UNK118(or, link), reg0);
-		nv_wr32(dev, NV50_SOR_DP_UNK120(or, link), reg1);
-		nv_wr32(dev, NV50_SOR_DP_UNK130(or, link), reg2);
-	}
+		if ((dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK) != voltage) {
+			voltage = dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+			tries = 0;
+		}
+	} while (!cr_done && !abort && ++tries < 5);
 
-	ret = auxch_wr(encoder, DP_TRAINING_LANE0_SET, config, 4);
-	if (ret)
-		return false;
+	return cr_done ? 0 : -1;
+}
 
-	return true;
+static int
+dp_link_train_eq(struct drm_device *dev, struct dp_state *dp)
+{
+	bool eq_done, cr_done = true;
+	int tries = 0, i;
+
+	dp_set_training_pattern(dev, dp, DP_TRAINING_PATTERN_2);
+
+	do {
+		if (dp_link_train_update(dev, dp, 400))
+			break;
+
+		eq_done = !!(dp->stat[2] & DP_INTERLANE_ALIGN_DONE);
+		for (i = 0; i < dp->link_nr && eq_done; i++) {
+			u8 lane = (dp->stat[i >> 1] >> ((i & 1) * 4)) & 0xf;
+			if (!(lane & DP_LANE_CR_DONE))
+				cr_done = false;
+			if (!(lane & DP_LANE_CHANNEL_EQ_DONE) ||
+			    !(lane & DP_LANE_SYMBOL_LOCKED))
+				eq_done = false;
+		}
+
+		if (dp_link_train_commit(dev, dp))
+			break;
+	} while (!eq_done && cr_done && ++tries <= 5);
+
+	return eq_done ? 0 : -1;
 }
 
 bool
-nouveau_dp_link_train(struct drm_encoder *encoder)
+nouveau_dp_link_train(struct drm_encoder *encoder, u32 datarate)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct drm_nouveau_private *dev_priv = encoder->dev->dev_private;
 	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-	struct bit_displayport_encoder_table *dpe;
-	int dpe_headerlen;
-	uint8_t config[4], status[3];
-	bool cr_done, cr_max_vs, eq_done, hpd_state;
-	int ret = 0, i, tries, voltage;
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector =
+		nouveau_encoder_connector_get(nv_encoder);
+	struct drm_device *dev = encoder->dev;
+	struct nouveau_i2c_chan *auxch;
+	const u32 bw_list[] = { 270000, 162000, 0 };
+	const u32 *link_bw = bw_list;
+	struct dp_state dp;
 
-	NV_DEBUG_KMS(dev, "link training!!\n");
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!nv_connector)
+	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
+	if (!auxch)
 		return false;
 
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe) {
-		NV_ERROR(dev, "SOR-%d: no DP encoder table!\n", nv_encoder->or);
-		return false;
-	}
+	dp.table = nouveau_dp_bios_data(dev, nv_encoder->dcb, &dp.entry);
+	if (!dp.table)
+		return -EINVAL;
 
-	/* disable hotplug detect, this flips around on some panels during
-	 * link training.
+	dp.dcb = nv_encoder->dcb;
+	dp.crtc = nv_crtc->index;
+	dp.auxch = auxch->rd;
+	dp.or = nv_encoder->or;
+	dp.link = !(nv_encoder->dcb->sorconf.link & 1);
+	dp.dpcd = nv_encoder->dp.dpcd;
+
+	/* some sinks toggle hotplug in response to some of the actions
+	 * we take during link training (DP_SET_POWER is one), we need
+	 * to ignore them for the moment to avoid races.
 	 */
-	hpd_state = pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, false);
+	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, false);
 
-	if (dpe->script0) {
-		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 0\n", nv_encoder->or);
-		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script0),
-					    nv_encoder->dcb);
+	/* enable down-spreading, if possible */
+	if (dp.table[1] >= 16) {
+		u16 script = ROM16(dp.entry[14]);
+		if (nv_encoder->dp.dpcd[3] & 1)
+			script = ROM16(dp.entry[12]);
+
+		nouveau_bios_run_init_table(dev, script, dp.dcb, dp.crtc);
 	}
 
-train:
-	cr_done = eq_done = false;
+	/* execute pre-train script from vbios */
+	nouveau_bios_run_init_table(dev, ROM16(dp.entry[6]), dp.dcb, dp.crtc);
 
-	/* set link configuration */
-	NV_DEBUG_KMS(dev, "\tbegin train: bw %d, lanes %d\n",
-		 nv_encoder->dp.link_bw, nv_encoder->dp.link_nr);
+	/* start off at highest link rate supported by encoder and display */
+	while (*link_bw > nv_encoder->dp.link_bw)
+		link_bw++;
 
-	ret = nouveau_dp_link_bw_set(encoder, nv_encoder->dp.link_bw);
-	if (ret)
-		return false;
+	while (link_bw[0]) {
+		/* find minimum required lane count at this link rate */
+		dp.link_nr = nv_encoder->dp.link_nr;
+		while ((dp.link_nr >> 1) * link_bw[0] > datarate)
+			dp.link_nr >>= 1;
 
-	config[0] = nv_encoder->dp.link_nr;
-	if (nv_encoder->dp.dpcd_version >= 0x11 &&
-	    nv_encoder->dp.enhanced_frame)
-		config[0] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+		/* drop link rate to minimum with this lane count */
+		while ((link_bw[1] * dp.link_nr) > datarate)
+			link_bw++;
+		dp.link_bw = link_bw[0];
 
-	ret = nouveau_dp_lane_count_set(encoder, config[0]);
-	if (ret)
-		return false;
+		/* program selected link configuration */
+		dp_set_link_config(dev, &dp);
 
-	/* clock recovery */
-	NV_DEBUG_KMS(dev, "\tbegin cr\n");
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_1);
-	if (ret)
-		goto stop;
-
-	tries = 0;
-	voltage = -1;
-	memset(config, 0x00, sizeof(config));
-	for (;;) {
-		if (!nouveau_dp_link_train_commit(encoder, config))
+		/* attempt to train the link at this configuration */
+		memset(dp.stat, 0x00, sizeof(dp.stat));
+		if (!dp_link_train_cr(dev, &dp) &&
+		    !dp_link_train_eq(dev, &dp))
 			break;
 
-		udelay(100);
-
-		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 2);
-		if (ret)
-			break;
-		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
-			 status[0], status[1]);
-
-		cr_done = true;
-		cr_max_vs = false;
-		for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-			int lane = (status[i >> 1] >> ((i & 1) * 4)) & 0xf;
-
-			if (!(lane & DP_LANE_CR_DONE)) {
-				cr_done = false;
-				if (config[i] & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED)
-					cr_max_vs = true;
-				break;
-			}
-		}
-
-		if ((config[0] & DP_TRAIN_VOLTAGE_SWING_MASK) != voltage) {
-			voltage = config[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
-			tries = 0;
-		}
-
-		if (cr_done || cr_max_vs || (++tries == 5))
-			break;
-
-		if (!nouveau_dp_link_train_adjust(encoder, config))
-			break;
+		/* retry at lower rate */
+		link_bw++;
 	}
 
-	if (!cr_done)
-		goto stop;
+	/* finish link training */
+	dp_set_training_pattern(dev, &dp, DP_TRAINING_PATTERN_DISABLE);
 
-	/* channel equalisation */
-	NV_DEBUG_KMS(dev, "\tbegin eq\n");
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_2);
-	if (ret)
-		goto stop;
-
-	for (tries = 0; tries <= 5; tries++) {
-		udelay(400);
-
-		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 3);
-		if (ret)
-			break;
-		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
-			 status[0], status[1]);
-
-		eq_done = true;
-		if (!(status[2] & DP_INTERLANE_ALIGN_DONE))
-			eq_done = false;
-
-		for (i = 0; eq_done && i < nv_encoder->dp.link_nr; i++) {
-			int lane = (status[i >> 1] >> ((i & 1) * 4)) & 0xf;
-
-			if (!(lane & DP_LANE_CR_DONE)) {
-				cr_done = false;
-				break;
-			}
-
-			if (!(lane & DP_LANE_CHANNEL_EQ_DONE) ||
-			    !(lane & DP_LANE_SYMBOL_LOCKED)) {
-				eq_done = false;
-				break;
-			}
-		}
-
-		if (eq_done || !cr_done)
-			break;
-
-		if (!nouveau_dp_link_train_adjust(encoder, config) ||
-		    !nouveau_dp_link_train_commit(encoder, config))
-			break;
-	}
-
-stop:
-	/* end link training */
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_DISABLE);
-	if (ret)
-		return false;
-
-	/* retry at a lower setting, if possible */
-	if (!ret && !(eq_done && cr_done)) {
-		NV_DEBUG_KMS(dev, "\twe failed\n");
-		if (nv_encoder->dp.link_bw != DP_LINK_BW_1_62) {
-			NV_DEBUG_KMS(dev, "retry link training at low rate\n");
-			nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
-			goto train;
-		}
-	}
-
-	if (dpe->script1) {
-		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 1\n", nv_encoder->or);
-		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script1),
-					    nv_encoder->dcb);
-	}
+	/* execute post-train script from vbios */
+	nouveau_bios_run_init_table(dev, ROM16(dp.entry[8]), dp.dcb, dp.crtc);
 
 	/* re-enable hotplug detect */
-	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, hpd_state);
-
-	return eq_done;
+	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, true);
+	return true;
 }
 
 bool
@@ -447,31 +648,34 @@
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	uint8_t dpcd[4];
+	struct nouveau_i2c_chan *auxch;
+	u8 *dpcd = nv_encoder->dp.dpcd;
 	int ret;
 
-	ret = auxch_rd(encoder, 0x0000, dpcd, 4);
+	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
+	if (!auxch)
+		return false;
+
+	ret = auxch_tx(dev, auxch->rd, 9, DP_DPCD_REV, dpcd, 8);
 	if (ret)
 		return false;
 
-	NV_DEBUG_KMS(dev, "encoder: link_bw %d, link_nr %d\n"
-		      "display: link_bw %d, link_nr %d version 0x%02x\n",
-		 nv_encoder->dcb->dpconf.link_bw,
-		 nv_encoder->dcb->dpconf.link_nr,
-		 dpcd[1], dpcd[2] & 0x0f, dpcd[0]);
-
-	nv_encoder->dp.dpcd_version = dpcd[0];
-
-	nv_encoder->dp.link_bw = dpcd[1];
-	if (nv_encoder->dp.link_bw != DP_LINK_BW_1_62 &&
-	    !nv_encoder->dcb->dpconf.link_bw)
-		nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
-
+	nv_encoder->dp.link_bw = 27000 * dpcd[1];
 	nv_encoder->dp.link_nr = dpcd[2] & DP_MAX_LANE_COUNT_MASK;
-	if (nv_encoder->dp.link_nr > nv_encoder->dcb->dpconf.link_nr)
-		nv_encoder->dp.link_nr = nv_encoder->dcb->dpconf.link_nr;
 
-	nv_encoder->dp.enhanced_frame = (dpcd[2] & DP_ENHANCED_FRAME_CAP);
+	NV_DEBUG_KMS(dev, "display: %dx%d dpcd 0x%02x\n",
+		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw, dpcd[0]);
+	NV_DEBUG_KMS(dev, "encoder: %dx%d\n",
+		     nv_encoder->dcb->dpconf.link_nr,
+		     nv_encoder->dcb->dpconf.link_bw);
+
+	if (nv_encoder->dcb->dpconf.link_nr < nv_encoder->dp.link_nr)
+		nv_encoder->dp.link_nr = nv_encoder->dcb->dpconf.link_nr;
+	if (nv_encoder->dcb->dpconf.link_bw < nv_encoder->dp.link_bw)
+		nv_encoder->dp.link_bw = nv_encoder->dcb->dpconf.link_bw;
+
+	NV_DEBUG_KMS(dev, "maximum: %dx%d\n",
+		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw);
 
 	return true;
 }
@@ -480,105 +684,13 @@
 nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 		 uint8_t *data, int data_nr)
 {
-	struct drm_device *dev = auxch->dev;
-	uint32_t tmp, ctrl, stat = 0, data32[4] = {};
-	int ret = 0, i, index = auxch->rd;
-
-	NV_DEBUG_KMS(dev, "ch %d cmd %d addr 0x%x len %d\n", index, cmd, addr, data_nr);
-
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	nv_wr32(dev, NV50_AUXCH_CTRL(auxch->rd), tmp | 0x00100000);
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	if (!(tmp & 0x01000000)) {
-		NV_ERROR(dev, "expected bit 24 == 1, got 0x%08x\n", tmp);
-		ret = -EIO;
-		goto out;
-	}
-
-	for (i = 0; i < 3; i++) {
-		tmp = nv_rd32(dev, NV50_AUXCH_STAT(auxch->rd));
-		if (tmp & NV50_AUXCH_STAT_STATE_READY)
-			break;
-		udelay(100);
-	}
-
-	if (i == 3) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	if (!(cmd & 1)) {
-		memcpy(data32, data, data_nr);
-		for (i = 0; i < 4; i++) {
-			NV_DEBUG_KMS(dev, "wr %d: 0x%08x\n", i, data32[i]);
-			nv_wr32(dev, NV50_AUXCH_DATA_OUT(index, i), data32[i]);
-		}
-	}
-
-	nv_wr32(dev, NV50_AUXCH_ADDR(index), addr);
-	ctrl  = nv_rd32(dev, NV50_AUXCH_CTRL(index));
-	ctrl &= ~(NV50_AUXCH_CTRL_CMD | NV50_AUXCH_CTRL_LEN);
-	ctrl |= (cmd << NV50_AUXCH_CTRL_CMD_SHIFT);
-	ctrl |= ((data_nr - 1) << NV50_AUXCH_CTRL_LEN_SHIFT);
-
-	for (i = 0; i < 16; i++) {
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x80000000);
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl);
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x00010000);
-		if (!nv_wait(dev, NV50_AUXCH_CTRL(index),
-			     0x00010000, 0x00000000)) {
-			NV_ERROR(dev, "expected bit 16 == 0, got 0x%08x\n",
-				 nv_rd32(dev, NV50_AUXCH_CTRL(index)));
-			ret = -EBUSY;
-			goto out;
-		}
-
-		udelay(400);
-
-		stat = nv_rd32(dev, NV50_AUXCH_STAT(index));
-		if ((stat & NV50_AUXCH_STAT_REPLY_AUX) !=
-			    NV50_AUXCH_STAT_REPLY_AUX_DEFER)
-			break;
-	}
-
-	if (i == 16) {
-		NV_ERROR(dev, "auxch DEFER too many times, bailing\n");
-		ret = -EREMOTEIO;
-		goto out;
-	}
-
-	if (cmd & 1) {
-		if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) {
-			ret = -EREMOTEIO;
-			goto out;
-		}
-
-		for (i = 0; i < 4; i++) {
-			data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i));
-			NV_DEBUG_KMS(dev, "rd %d: 0x%08x\n", i, data32[i]);
-		}
-		memcpy(data, data32, data_nr);
-	}
-
-out:
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	nv_wr32(dev, NV50_AUXCH_CTRL(auxch->rd), tmp & ~0x00100000);
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	if (tmp & 0x01000000) {
-		NV_ERROR(dev, "expected bit 24 == 0, got 0x%08x\n", tmp);
-		ret = -EIO;
-	}
-
-	udelay(400);
-
-	return ret ? ret : (stat & NV50_AUXCH_STAT_REPLY);
+	return auxch_tx(auxch->dev, auxch->rd, cmd, addr, data, data_nr);
 }
 
 static int
 nouveau_dp_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 {
 	struct nouveau_i2c_chan *auxch = (struct nouveau_i2c_chan *)adap;
-	struct drm_device *dev = auxch->dev;
 	struct i2c_msg *msg = msgs;
 	int ret, mcnt = num;
 
@@ -602,19 +714,6 @@
 			if (ret < 0)
 				return ret;
 
-			switch (ret & NV50_AUXCH_STAT_REPLY_I2C) {
-			case NV50_AUXCH_STAT_REPLY_I2C_ACK:
-				break;
-			case NV50_AUXCH_STAT_REPLY_I2C_NACK:
-				return -EREMOTEIO;
-			case NV50_AUXCH_STAT_REPLY_I2C_DEFER:
-				udelay(100);
-				continue;
-			default:
-				NV_ERROR(dev, "bad auxch reply: 0x%08x\n", ret);
-				return -EREMOTEIO;
-			}
-
 			ptr += cnt;
 			remaining -= cnt;
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index b30ddd8..c1e01f3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -41,7 +41,7 @@
 module_param_named(agpmode, nouveau_agpmode, int, 0400);
 
 MODULE_PARM_DESC(modeset, "Enable kernel modesetting");
-static int nouveau_modeset = -1; /* kms */
+int nouveau_modeset = -1;
 module_param_named(modeset, nouveau_modeset, int, 0400);
 
 MODULE_PARM_DESC(vbios, "Override default VBIOS location");
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index d7d51de..29837da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -414,12 +414,13 @@
 };
 
 struct nouveau_pm_voltage_level {
-	u8 voltage;
-	u8 vid;
+	u32 voltage; /* microvolts */
+	u8  vid;
 };
 
 struct nouveau_pm_voltage {
 	bool supported;
+	u8 version;
 	u8 vid_mask;
 
 	struct nouveau_pm_voltage_level *level;
@@ -428,17 +429,48 @@
 
 struct nouveau_pm_memtiming {
 	int id;
-	u32 reg_100220;
-	u32 reg_100224;
-	u32 reg_100228;
-	u32 reg_10022c;
-	u32 reg_100230;
-	u32 reg_100234;
-	u32 reg_100238;
-	u32 reg_10023c;
-	u32 reg_100240;
+	u32 reg_0; /* 0x10f290 on Fermi, 0x100220 for older */
+	u32 reg_1;
+	u32 reg_2;
+	u32 reg_3;
+	u32 reg_4;
+	u32 reg_5;
+	u32 reg_6;
+	u32 reg_7;
+	u32 reg_8;
+	/* To be written to 0x1002c0 */
+	u8 CL;
+	u8 WR;
 };
 
+struct nouveau_pm_tbl_header{
+	u8 version;
+	u8 header_len;
+	u8 entry_cnt;
+	u8 entry_len;
+};
+
+struct nouveau_pm_tbl_entry{
+	u8 tWR;
+	u8 tUNK_1;
+	u8 tCL;
+	u8 tRP;		/* Byte 3 */
+	u8 empty_4;
+	u8 tRAS;	/* Byte 5 */
+	u8 empty_6;
+	u8 tRFC;	/* Byte 7 */
+	u8 empty_8;
+	u8 tRC;		/* Byte 9 */
+	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
+	u8 empty_15,empty_16,empty_17;
+	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
+};
+
+/* nouveau_mem.c */
+void nv30_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing);
+
 #define NOUVEAU_PM_MAX_LEVEL 8
 struct nouveau_pm_level {
 	struct device_attribute dev_attr;
@@ -448,11 +480,19 @@
 	u32 core;
 	u32 memory;
 	u32 shader;
-	u32 unk05;
-	u32 unk0a;
+	u32 rop;
+	u32 copy;
+	u32 daemon;
+	u32 vdec;
+	u32 unk05;	/* nv50:nva3, roughly.. */
+	u32 unka0;	/* nva3:nvc0 */
+	u32 hub01;	/* nvc0- */
+	u32 hub06;	/* nvc0- */
+	u32 hub07;	/* nvc0- */
 
-	u8 voltage;
-	u8 fanspeed;
+	u32 volt_min; /* microvolts */
+	u32 volt_max;
+	u8  fanspeed;
 
 	u16 memscript;
 	struct nouveau_pm_memtiming *timing;
@@ -496,6 +536,11 @@
 	void *(*clock_pre)(struct drm_device *, struct nouveau_pm_level *,
 			   u32 id, int khz);
 	void (*clock_set)(struct drm_device *, void *);
+
+	int  (*clocks_get)(struct drm_device *, struct nouveau_pm_level *);
+	void *(*clocks_pre)(struct drm_device *, struct nouveau_pm_level *);
+	void (*clocks_set)(struct drm_device *, void *);
+
 	int (*voltage_get)(struct drm_device *);
 	int (*voltage_set)(struct drm_device *, int voltage);
 	int (*fanspeed_get)(struct drm_device *);
@@ -504,7 +549,7 @@
 };
 
 struct nouveau_vram_engine {
-	struct nouveau_mm *mm;
+	struct nouveau_mm mm;
 
 	int  (*init)(struct drm_device *);
 	void (*takedown)(struct drm_device *dev);
@@ -623,6 +668,7 @@
 	NV_40      = 0x40,
 	NV_50      = 0x50,
 	NV_C0      = 0xc0,
+	NV_D0      = 0xd0
 };
 
 struct drm_nouveau_private {
@@ -633,8 +679,8 @@
 	enum nouveau_card_type card_type;
 	/* exact chipset, derived from NV_PMC_BOOT_0 */
 	int chipset;
-	int stepping;
 	int flags;
+	u32 crystal;
 
 	void __iomem *mmio;
 
@@ -721,7 +767,6 @@
 	uint64_t vram_size;
 	uint64_t vram_sys_base;
 
-	uint64_t fb_phys;
 	uint64_t fb_available_size;
 	uint64_t fb_mappable_pages;
 	uint64_t fb_aper_free;
@@ -784,6 +829,7 @@
 }
 
 /* nouveau_drv.c */
+extern int nouveau_modeset;
 extern int nouveau_agpmode;
 extern int nouveau_duallink;
 extern int nouveau_uscript_lvds;
@@ -824,6 +870,8 @@
 			    uint32_t reg, uint32_t mask, uint32_t val);
 extern bool nouveau_wait_ne(struct drm_device *, uint64_t timeout,
 			    uint32_t reg, uint32_t mask, uint32_t val);
+extern bool nouveau_wait_cb(struct drm_device *, u64 timeout,
+			    bool (*cond)(void *), void *);
 extern bool nouveau_wait_for_idle(struct drm_device *);
 extern int  nouveau_card_init(struct drm_device *);
 
@@ -1006,15 +1054,15 @@
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
-extern int nouveau_backlight_init(struct drm_connector *);
-extern void nouveau_backlight_exit(struct drm_connector *);
+extern int nouveau_backlight_init(struct drm_device *);
+extern void nouveau_backlight_exit(struct drm_device *);
 #else
-static inline int nouveau_backlight_init(struct drm_connector *dev)
+static inline int nouveau_backlight_init(struct drm_device *dev)
 {
 	return 0;
 }
 
-static inline void nouveau_backlight_exit(struct drm_connector *dev) { }
+static inline void nouveau_backlight_exit(struct drm_device *dev) { }
 #endif
 
 /* nouveau_bios.c */
@@ -1022,7 +1070,8 @@
 extern void nouveau_bios_takedown(struct drm_device *dev);
 extern int nouveau_run_vbios_init(struct drm_device *);
 extern void nouveau_bios_run_init_table(struct drm_device *, uint16_t table,
-					struct dcb_entry *);
+					struct dcb_entry *, int crtc);
+extern void nouveau_bios_init_exec(struct drm_device *, uint16_t table);
 extern struct dcb_gpio_entry *nouveau_bios_gpio_entry(struct drm_device *,
 						      enum dcb_gpio_tag);
 extern struct dcb_connector_table_entry *
@@ -1030,11 +1079,8 @@
 extern u32 get_pll_register(struct drm_device *, enum pll_types);
 extern int get_pll_limits(struct drm_device *, uint32_t limit_match,
 			  struct pll_lims *);
-extern int nouveau_bios_run_display_table(struct drm_device *,
-					  struct dcb_entry *,
-					  uint32_t script, int pxclk);
-extern void *nouveau_bios_dp_table(struct drm_device *, struct dcb_entry *,
-				   int *length);
+extern int nouveau_bios_run_display_table(struct drm_device *, u16 id, int clk,
+					  struct dcb_entry *, int crtc);
 extern bool nouveau_bios_fp_mode(struct drm_device *, struct drm_display_mode *);
 extern uint8_t *nouveau_bios_embedded_edid(struct drm_device *);
 extern int nouveau_bios_parse_lvds_table(struct drm_device *, int pxclk,
@@ -1043,6 +1089,7 @@
 			  int head, int pxclk);
 extern int call_lvds_script(struct drm_device *, struct dcb_entry *, int head,
 			    enum LVDS_script, int pxclk);
+bool bios_encoder_match(struct dcb_entry *, u32 hash);
 
 /* nouveau_ttm.c */
 int nouveau_ttm_global_init(struct drm_nouveau_private *);
@@ -1053,7 +1100,9 @@
 int nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 		     uint8_t *data, int data_nr);
 bool nouveau_dp_detect(struct drm_encoder *);
-bool nouveau_dp_link_train(struct drm_encoder *);
+bool nouveau_dp_link_train(struct drm_encoder *, u32 datarate);
+void nouveau_dp_tu_update(struct drm_device *, int, int, u32, u32);
+u8 *nouveau_dp_bios_data(struct drm_device *, struct dcb_entry *, u8 **);
 
 /* nv04_fb.c */
 extern int  nv04_fb_init(struct drm_device *);
@@ -1179,8 +1228,8 @@
 /* nvc0_copy.c */
 extern int  nvc0_copy_create(struct drm_device *dev, int engine);
 
-/* nv40_mpeg.c */
-extern int  nv40_mpeg_create(struct drm_device *dev);
+/* nv31_mpeg.c */
+extern int  nv31_mpeg_create(struct drm_device *dev);
 
 /* nv50_mpeg.c */
 extern int  nv50_mpeg_create(struct drm_device *dev);
@@ -1265,6 +1314,11 @@
 extern int nv04_display_init(struct drm_device *);
 extern void nv04_display_destroy(struct drm_device *);
 
+/* nvd0_display.c */
+extern int nvd0_display_create(struct drm_device *);
+extern int nvd0_display_init(struct drm_device *);
+extern void nvd0_display_destroy(struct drm_device *);
+
 /* nv04_crtc.c */
 extern int nv04_crtc_create(struct drm_device *, int index);
 
@@ -1374,6 +1428,8 @@
 void nv50_gpio_fini(struct drm_device *dev);
 int nv50_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
 int nv50_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state);
+int nvd0_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
+int nvd0_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state);
 int  nv50_gpio_irq_register(struct drm_device *, enum dcb_gpio_tag,
 			    void (*)(void *, int), void *);
 void nv50_gpio_irq_unregister(struct drm_device *, enum dcb_gpio_tag,
@@ -1448,6 +1504,8 @@
 	nouveau_wait_eq(dev, 2000000000ULL, (reg), (mask), (val))
 #define nv_wait_ne(dev, reg, mask, val) \
 	nouveau_wait_ne(dev, 2000000000ULL, (reg), (mask), (val))
+#define nv_wait_cb(dev, func, data) \
+	nouveau_wait_cb(dev, 2000000000ULL, (func), (data))
 
 /* PRAMIN access */
 static inline u32 nv_ri32(struct drm_device *dev, unsigned offset)
@@ -1514,6 +1572,7 @@
 	NOUVEAU_REG_DEBUG_RMVIO          = 0x80,
 	NOUVEAU_REG_DEBUG_VGAATTR        = 0x100,
 	NOUVEAU_REG_DEBUG_EVO            = 0x200,
+	NOUVEAU_REG_DEBUG_AUXCH          = 0x400
 };
 
 #define NV_REG_DEBUG(type, dev, fmt, arg...) do { \
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index ae69b61..e5d6e3f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -49,17 +49,17 @@
 
 	union {
 		struct {
-			int mc_unknown;
-			uint32_t unk0;
-			uint32_t unk1;
-			int dpcd_version;
+			u8  dpcd[8];
 			int link_nr;
 			int link_bw;
-			bool enhanced_frame;
+			u32 datarate;
 		} dp;
 	};
 };
 
+struct nouveau_encoder *
+find_encoder(struct drm_connector *connector, int type);
+
 static inline struct nouveau_encoder *nouveau_encoder(struct drm_encoder *enc)
 {
 	struct drm_encoder_slave *slave = to_encoder_slave(enc);
@@ -83,21 +83,4 @@
 int nv50_sor_create(struct drm_connector *, struct dcb_entry *);
 int nv50_dac_create(struct drm_connector *, struct dcb_entry *);
 
-struct bit_displayport_encoder_table {
-	uint32_t match;
-	uint8_t  record_nr;
-	uint8_t  unknown;
-	uint16_t script0;
-	uint16_t script1;
-	uint16_t unknown_table;
-} __attribute__ ((packed));
-
-struct bit_displayport_encoder_table_entry {
-	uint8_t vs_level;
-	uint8_t pre_level;
-	uint8_t reg0;
-	uint8_t reg1;
-	uint8_t reg2;
-} __attribute__ ((packed));
-
 #endif /* __NOUVEAU_ENCODER_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index c919cfc..81116cf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -519,7 +519,7 @@
 	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
 		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
 
-		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
 					     mem->start << PAGE_SHIFT,
 					     mem->size, NV_MEM_ACCESS_RW,
 					     NV_MEM_TARGET_VRAM, &obj);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 5f0bc57..322bf62 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -589,7 +589,8 @@
 		}
 
 		spin_lock(&nvbo->bo.bdev->fence_lock);
-		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+		ret = ttm_bo_wait(&nvbo->bo, false, false, false,
+				  TTM_USAGE_READWRITE);
 		spin_unlock(&nvbo->bo.bdev->fence_lock);
 		if (ret) {
 			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
@@ -825,7 +826,7 @@
 	nvbo = nouveau_gem_object(gem);
 
 	spin_lock(&nvbo->bo.bdev->fence_lock);
-	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
+	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait, TTM_USAGE_READWRITE);
 	spin_unlock(&nvbo->bo.bdev->fence_lock);
 	drm_gem_object_unreference_unlocked(gem);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
index cb389d0..f6a27fa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -107,6 +107,13 @@
 	return !!((nv_rd32(dev, i2c->rd) >> 16) & 8);
 }
 
+static const uint32_t nv50_i2c_port[] = {
+	0x00e138, 0x00e150, 0x00e168, 0x00e180,
+	0x00e254, 0x00e274, 0x00e764, 0x00e780,
+	0x00e79c, 0x00e7b8
+};
+#define NV50_I2C_PORTS ARRAY_SIZE(nv50_i2c_port)
+
 static int
 nv50_i2c_getscl(void *data)
 {
@@ -130,28 +137,32 @@
 nv50_i2c_setscl(void *data, int state)
 {
 	struct nouveau_i2c_chan *i2c = data;
-	struct drm_device *dev = i2c->dev;
 
-	nv_wr32(dev, i2c->wr, 4 | (i2c->data ? 2 : 0) | (state ? 1 : 0));
+	nv_wr32(i2c->dev, i2c->wr, 4 | (i2c->data ? 2 : 0) | (state ? 1 : 0));
 }
 
 static void
 nv50_i2c_setsda(void *data, int state)
 {
 	struct nouveau_i2c_chan *i2c = data;
-	struct drm_device *dev = i2c->dev;
 
-	nv_wr32(dev, i2c->wr,
-			(nv_rd32(dev, i2c->rd) & 1) | 4 | (state ? 2 : 0));
+	nv_mask(i2c->dev, i2c->wr, 0x00000006, 4 | (state ? 2 : 0));
 	i2c->data = state;
 }
 
-static const uint32_t nv50_i2c_port[] = {
-	0x00e138, 0x00e150, 0x00e168, 0x00e180,
-	0x00e254, 0x00e274, 0x00e764, 0x00e780,
-	0x00e79c, 0x00e7b8
-};
-#define NV50_I2C_PORTS ARRAY_SIZE(nv50_i2c_port)
+static int
+nvd0_i2c_getscl(void *data)
+{
+	struct nouveau_i2c_chan *i2c = data;
+	return !!(nv_rd32(i2c->dev, i2c->rd) & 0x10);
+}
+
+static int
+nvd0_i2c_getsda(void *data)
+{
+	struct nouveau_i2c_chan *i2c = data;
+	return !!(nv_rd32(i2c->dev, i2c->rd) & 0x20);
+}
 
 int
 nouveau_i2c_init(struct drm_device *dev, struct dcb_i2c_entry *entry, int index)
@@ -163,7 +174,8 @@
 	if (entry->chan)
 		return -EEXIST;
 
-	if (dev_priv->card_type >= NV_50 && entry->read >= NV50_I2C_PORTS) {
+	if (dev_priv->card_type >= NV_50 &&
+	    dev_priv->card_type <= NV_C0 && entry->read >= NV50_I2C_PORTS) {
 		NV_ERROR(dev, "unknown i2c port %d\n", entry->read);
 		return -EINVAL;
 	}
@@ -192,10 +204,17 @@
 	case 5:
 		i2c->bit.setsda = nv50_i2c_setsda;
 		i2c->bit.setscl = nv50_i2c_setscl;
-		i2c->bit.getsda = nv50_i2c_getsda;
-		i2c->bit.getscl = nv50_i2c_getscl;
-		i2c->rd = nv50_i2c_port[entry->read];
-		i2c->wr = i2c->rd;
+		if (dev_priv->card_type < NV_D0) {
+			i2c->bit.getsda = nv50_i2c_getsda;
+			i2c->bit.getscl = nv50_i2c_getscl;
+			i2c->rd = nv50_i2c_port[entry->read];
+			i2c->wr = i2c->rd;
+		} else {
+			i2c->bit.getsda = nvd0_i2c_getsda;
+			i2c->bit.getscl = nvd0_i2c_getscl;
+			i2c->rd = 0x00d014 + (entry->read * 0x20);
+			i2c->wr = i2c->rd;
+		}
 		break;
 	case 6:
 		i2c->rd = entry->read;
@@ -267,7 +286,10 @@
 			val  = 0xe001;
 		}
 
-		nv_wr32(dev, reg, (nv_rd32(dev, reg) & ~0xf003) | val);
+		/* nfi, but neither auxch or i2c work if it's 1 */
+		nv_mask(dev, reg + 0x0c, 0x00000001, 0x00000000);
+		/* nfi, but switches auxch vs normal i2c */
+		nv_mask(dev, reg + 0x00, 0x0000f003, val);
 	}
 
 	if (!i2c->chan && nouveau_i2c_init(dev, i2c, index))
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index f9ae2fc..36bec48 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -408,8 +408,6 @@
 	if (ret)
 		return ret;
 
-	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
-
 	ret = nouveau_ttm_global_init(dev_priv);
 	if (ret)
 		return ret;
@@ -504,35 +502,146 @@
 	return 0;
 }
 
+/* XXX: For now a dummy. More samples required, possibly even a card
+ * Called from nouveau_perf.c */
+void nv30_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing) {
+
+	NV_DEBUG(dev,"Timing entry format unknown, please contact nouveau developers");
+}
+
+void nv40_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing) {
+
+	timing->reg_0 = (e->tRC << 24 | e->tRFC << 16 | e->tRAS << 8 | e->tRP);
+
+	/* XXX: I don't trust the -1's and +1's... they must come
+	 *      from somewhere! */
+	timing->reg_1 = (e->tWR + 2 + magic_number) << 24 |
+				  1 << 16 |
+				  (e->tUNK_1 + 2 + magic_number) << 8 |
+				  (e->tCL + 2 - magic_number);
+	timing->reg_2 = (magic_number << 24 | e->tUNK_12 << 16 | e->tUNK_11 << 8 | e->tUNK_10);
+	timing->reg_2 |= 0x20200000;
+
+	NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,timing->reg_2);
+}
+
+void nv50_mem_timing_entry(struct drm_device *dev, struct bit_entry *P, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,struct nouveau_pm_memtiming *timing) {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	uint8_t unk18 = 1,
+		unk19 = 1,
+		unk20 = 0,
+		unk21 = 0;
+
+	switch (min(hdr->entry_len, (u8) 22)) {
+	case 22:
+		unk21 = e->tUNK_21;
+	case 21:
+		unk20 = e->tUNK_20;
+	case 20:
+		unk19 = e->tUNK_19;
+	case 19:
+		unk18 = e->tUNK_18;
+		break;
+	}
+
+	timing->reg_0 = (e->tRC << 24 | e->tRFC << 16 | e->tRAS << 8 | e->tRP);
+
+	/* XXX: I don't trust the -1's and +1's... they must come
+	 *      from somewhere! */
+	timing->reg_1 = (e->tWR + unk19 + 1 + magic_number) << 24 |
+				  max(unk18, (u8) 1) << 16 |
+				  (e->tUNK_1 + unk19 + 1 + magic_number) << 8;
+	if (dev_priv->chipset == 0xa8) {
+		timing->reg_1 |= (e->tCL - 1);
+	} else {
+		timing->reg_1 |= (e->tCL + 2 - magic_number);
+	}
+	timing->reg_2 = (e->tUNK_12 << 16 | e->tUNK_11 << 8 | e->tUNK_10);
+
+	timing->reg_5 = (e->tRAS << 24 | e->tRC);
+	timing->reg_5 += max(e->tUNK_10, e->tUNK_11) << 16;
+
+	if (P->version == 1) {
+		timing->reg_2 |= magic_number << 24;
+		timing->reg_3 = (0x14 + e->tCL) << 24 |
+						0x16 << 16 |
+						(e->tCL - 1) << 8 |
+						(e->tCL - 1);
+		timing->reg_4 = (nv_rd32(dev,0x10022c) & 0xffff0000) | e->tUNK_13 << 8  | e->tUNK_13;
+		timing->reg_5 |= (e->tCL + 2) << 8;
+		timing->reg_7 = 0x4000202 | (e->tCL - 1) << 16;
+	} else {
+		timing->reg_2 |= (unk19 - 1) << 24;
+		/* XXX: reg_10022c for recentish cards pretty much unknown*/
+		timing->reg_3 = e->tCL - 1;
+		timing->reg_4 = (unk20 << 24 | unk21 << 16 |
+							e->tUNK_13 << 8  | e->tUNK_13);
+		/* XXX: +6? */
+		timing->reg_5 |= (unk19 + 6) << 8;
+
+		/* XXX: reg_10023c currently unknown
+		 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
+		timing->reg_7 = 0x202;
+	}
+
+	NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,
+		 timing->reg_2, timing->reg_3);
+	NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
+		 timing->reg_4, timing->reg_5,
+		 timing->reg_6, timing->reg_7);
+	NV_DEBUG(dev, "         240: %08x\n", timing->reg_8);
+}
+
+void nvc0_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, struct nouveau_pm_memtiming *timing) {
+	timing->reg_0 = (e->tRC << 24 | (e->tRFC & 0x7f) << 17 | e->tRAS << 8 | e->tRP);
+	timing->reg_1 = (nv_rd32(dev,0x10f294) & 0xff000000) | (e->tUNK_11&0x0f) << 20 | (e->tUNK_19 << 7) | (e->tCL & 0x0f);
+	timing->reg_2 = (nv_rd32(dev,0x10f298) & 0xff0000ff) | e->tWR << 16 | e->tUNK_1 << 8;
+	timing->reg_3 = e->tUNK_20 << 9 | e->tUNK_13;
+	timing->reg_4 = (nv_rd32(dev,0x10f2a0) & 0xfff000ff) | e->tUNK_12 << 15;
+	NV_DEBUG(dev, "Entry %d: 290: %08x %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,
+		 timing->reg_2, timing->reg_3);
+	NV_DEBUG(dev, "         2a0: %08x %08x %08x %08x\n",
+		 timing->reg_4, timing->reg_5,
+		 timing->reg_6, timing->reg_7);
+}
+
+/**
+ * Processes the Memory Timing BIOS table, stores generated
+ * register values
+ * @pre init scripts were run, memtiming regs are initialized
+ */
 void
 nouveau_mem_timing_init(struct drm_device *dev)
 {
-	/* cards < NVC0 only */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
 	struct nvbios *bios = &dev_priv->vbios;
 	struct bit_entry P;
-	u8 tUNK_0, tUNK_1, tUNK_2;
-	u8 tRP;		/* Byte 3 */
-	u8 tRAS;	/* Byte 5 */
-	u8 tRFC;	/* Byte 7 */
-	u8 tRC;		/* Byte 9 */
-	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
-	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
-	u8 magic_number = 0; /* Yeah... sorry*/
-	u8 *mem = NULL, *entry;
-	int i, recordlen, entries;
+	struct nouveau_pm_tbl_header *hdr = NULL;
+	uint8_t magic_number;
+	u8 *entry;
+	int i;
 
 	if (bios->type == NVBIOS_BIT) {
 		if (bit_table(dev, 'P', &P))
 			return;
 
 		if (P.version == 1)
-			mem = ROMPTR(bios, P.data[4]);
+			hdr = (struct nouveau_pm_tbl_header *) ROMPTR(bios, P.data[4]);
 		else
 		if (P.version == 2)
-			mem = ROMPTR(bios, P.data[8]);
+			hdr = (struct nouveau_pm_tbl_header *) ROMPTR(bios, P.data[8]);
 		else {
 			NV_WARN(dev, "unknown mem for BIT P %d\n", P.version);
 		}
@@ -541,150 +650,56 @@
 		return;
 	}
 
-	if (!mem) {
+	if (!hdr) {
 		NV_DEBUG(dev, "memory timing table pointer invalid\n");
 		return;
 	}
 
-	if (mem[0] != 0x10) {
-		NV_WARN(dev, "memory timing table 0x%02x unknown\n", mem[0]);
+	if (hdr->version != 0x10) {
+		NV_WARN(dev, "memory timing table 0x%02x unknown\n", hdr->version);
 		return;
 	}
 
 	/* validate record length */
-	entries   = mem[2];
-	recordlen = mem[3];
-	if (recordlen < 15) {
-		NV_ERROR(dev, "mem timing table length unknown: %d\n", mem[3]);
+	if (hdr->entry_len < 15) {
+		NV_ERROR(dev, "mem timing table length unknown: %d\n", hdr->entry_len);
 		return;
 	}
 
 	/* parse vbios entries into common format */
 	memtimings->timing =
-		kcalloc(entries, sizeof(*memtimings->timing), GFP_KERNEL);
+		kcalloc(hdr->entry_cnt, sizeof(*memtimings->timing), GFP_KERNEL);
 	if (!memtimings->timing)
 		return;
 
 	/* Get "some number" from the timing reg for NV_40 and NV_50
-	 * Used in calculations later */
-	if (dev_priv->card_type >= NV_40 && dev_priv->chipset < 0x98) {
+	 * Used in calculations later... source unknown */
+	magic_number = 0;
+	if (P.version == 1) {
 		magic_number = (nv_rd32(dev, 0x100228) & 0x0f000000) >> 24;
 	}
 
-	entry = mem + mem[1];
-	for (i = 0; i < entries; i++, entry += recordlen) {
+	entry = (u8*) hdr + hdr->header_len;
+	for (i = 0; i < hdr->entry_cnt; i++, entry += hdr->entry_len) {
 		struct nouveau_pm_memtiming *timing = &pm->memtimings.timing[i];
 		if (entry[0] == 0)
 			continue;
 
-		tUNK_18 = 1;
-		tUNK_19 = 1;
-		tUNK_20 = 0;
-		tUNK_21 = 0;
-		switch (min(recordlen, 22)) {
-		case 22:
-			tUNK_21 = entry[21];
-		case 21:
-			tUNK_20 = entry[20];
-		case 20:
-			tUNK_19 = entry[19];
-		case 19:
-			tUNK_18 = entry[18];
-		default:
-			tUNK_0  = entry[0];
-			tUNK_1  = entry[1];
-			tUNK_2  = entry[2];
-			tRP     = entry[3];
-			tRAS    = entry[5];
-			tRFC    = entry[7];
-			tRC     = entry[9];
-			tUNK_10 = entry[10];
-			tUNK_11 = entry[11];
-			tUNK_12 = entry[12];
-			tUNK_13 = entry[13];
-			tUNK_14 = entry[14];
-			break;
-		}
-
-		timing->reg_100220 = (tRC << 24 | tRFC << 16 | tRAS << 8 | tRP);
-
-		/* XXX: I don't trust the -1's and +1's... they must come
-		 *      from somewhere! */
-		timing->reg_100224 = (tUNK_0 + tUNK_19 + 1 + magic_number) << 24 |
-				      max(tUNK_18, (u8) 1) << 16 |
-				      (tUNK_1 + tUNK_19 + 1 + magic_number) << 8;
-		if (dev_priv->chipset == 0xa8) {
-			timing->reg_100224 |= (tUNK_2 - 1);
-		} else {
-			timing->reg_100224 |= (tUNK_2 + 2 - magic_number);
-		}
-
-		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
-		if (dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa)
-			timing->reg_100228 |= (tUNK_19 - 1) << 24;
-		else
-			timing->reg_100228 |= magic_number << 24;
-
-		if (dev_priv->card_type == NV_40) {
-			/* NV40: don't know what the rest of the regs are..
-			 * And don't need to know either */
-			timing->reg_100228 |= 0x20200000;
-		} else if (dev_priv->card_type >= NV_50) {
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_10022c = (0x14 + tUNK_2) << 24 |
-						     0x16 << 16 |
-						     (tUNK_2 - 1) << 8 |
-						     (tUNK_2 - 1);
-			} else {
-				/* XXX: reg_10022c for recentish cards */
-				timing->reg_10022c = tUNK_2 - 1;
-			}
-
-			timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
-						  tUNK_13 << 8  | tUNK_13);
-
-			timing->reg_100234 = (tRAS << 24 | tRC);
-			timing->reg_100234 += max(tUNK_10, tUNK_11) << 16;
-
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_100234 |= (tUNK_2 + 2) << 8;
-			} else {
-				/* XXX: +6? */
-				timing->reg_100234 |= (tUNK_19 + 6) << 8;
-			}
-
-			/* XXX; reg_100238
-			 * reg_100238: 0x00?????? */
-			timing->reg_10023c = 0x202;
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_10023c |= 0x4000000 | (tUNK_2 - 1) << 16;
-			} else {
-				/* XXX: reg_10023c
-				 * currently unknown
-				 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
-			}
-
-			/* XXX: reg_100240? */
-		}
 		timing->id = i;
+		timing->WR = entry[0];
+		timing->CL = entry[2];
 
-		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
-			 timing->reg_100220, timing->reg_100224,
-			 timing->reg_100228, timing->reg_10022c);
-		NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
-			 timing->reg_100230, timing->reg_100234,
-			 timing->reg_100238, timing->reg_10023c);
-		NV_DEBUG(dev, "         240: %08x\n", timing->reg_100240);
+		if(dev_priv->card_type <= NV_40) {
+			nv40_mem_timing_entry(dev,hdr,(struct nouveau_pm_tbl_entry*) entry,magic_number,&pm->memtimings.timing[i]);
+		} else if(dev_priv->card_type == NV_50){
+			nv50_mem_timing_entry(dev,&P,hdr,(struct nouveau_pm_tbl_entry*) entry,magic_number,&pm->memtimings.timing[i]);
+		} else if(dev_priv->card_type == NV_C0) {
+			nvc0_mem_timing_entry(dev,hdr,(struct nouveau_pm_tbl_entry*) entry,&pm->memtimings.timing[i]);
+		}
 	}
 
-	memtimings->nr_timing = entries;
-	memtimings->supported = (dev_priv->chipset <= 0x98);
+	memtimings->nr_timing = hdr->entry_cnt;
+	memtimings->supported = P.version == 1;
 }
 
 void
@@ -693,7 +708,10 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_memtimings *mem = &dev_priv->engine.pm.memtimings;
 
-	kfree(mem->timing);
+	if(mem->timing) {
+		kfree(mem->timing);
+		mem->timing = NULL;
+	}
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index 1640dec..b29ffb3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -27,7 +27,7 @@
 #include "nouveau_mm.h"
 
 static inline void
-region_put(struct nouveau_mm *rmm, struct nouveau_mm_node *a)
+region_put(struct nouveau_mm *mm, struct nouveau_mm_node *a)
 {
 	list_del(&a->nl_entry);
 	list_del(&a->fl_entry);
@@ -35,7 +35,7 @@
 }
 
 static struct nouveau_mm_node *
-region_split(struct nouveau_mm *rmm, struct nouveau_mm_node *a, u32 size)
+region_split(struct nouveau_mm *mm, struct nouveau_mm_node *a, u32 size)
 {
 	struct nouveau_mm_node *b;
 
@@ -57,33 +57,33 @@
 	return b;
 }
 
-#define node(root, dir) ((root)->nl_entry.dir == &rmm->nodes) ? NULL : \
+#define node(root, dir) ((root)->nl_entry.dir == &mm->nodes) ? NULL : \
 	list_entry((root)->nl_entry.dir, struct nouveau_mm_node, nl_entry)
 
 void
-nouveau_mm_put(struct nouveau_mm *rmm, struct nouveau_mm_node *this)
+nouveau_mm_put(struct nouveau_mm *mm, struct nouveau_mm_node *this)
 {
 	struct nouveau_mm_node *prev = node(this, prev);
 	struct nouveau_mm_node *next = node(this, next);
 
-	list_add(&this->fl_entry, &rmm->free);
+	list_add(&this->fl_entry, &mm->free);
 	this->type = 0;
 
 	if (prev && prev->type == 0) {
 		prev->length += this->length;
-		region_put(rmm, this);
+		region_put(mm, this);
 		this = prev;
 	}
 
 	if (next && next->type == 0) {
 		next->offset  = this->offset;
 		next->length += this->length;
-		region_put(rmm, this);
+		region_put(mm, this);
 	}
 }
 
 int
-nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
+nouveau_mm_get(struct nouveau_mm *mm, int type, u32 size, u32 size_nc,
 	       u32 align, struct nouveau_mm_node **pnode)
 {
 	struct nouveau_mm_node *prev, *this, *next;
@@ -92,17 +92,17 @@
 	u32 splitoff;
 	u32 s, e;
 
-	list_for_each_entry(this, &rmm->free, fl_entry) {
+	list_for_each_entry(this, &mm->free, fl_entry) {
 		e = this->offset + this->length;
 		s = this->offset;
 
 		prev = node(this, prev);
 		if (prev && prev->type != type)
-			s = roundup(s, rmm->block_size);
+			s = roundup(s, mm->block_size);
 
 		next = node(this, next);
 		if (next && next->type != type)
-			e = rounddown(e, rmm->block_size);
+			e = rounddown(e, mm->block_size);
 
 		s  = (s + align_mask) & ~align_mask;
 		e &= ~align_mask;
@@ -110,10 +110,10 @@
 			continue;
 
 		splitoff = s - this->offset;
-		if (splitoff && !region_split(rmm, this, splitoff))
+		if (splitoff && !region_split(mm, this, splitoff))
 			return -ENOMEM;
 
-		this = region_split(rmm, this, min(size, e - s));
+		this = region_split(mm, this, min(size, e - s));
 		if (!this)
 			return -ENOMEM;
 
@@ -127,52 +127,49 @@
 }
 
 int
-nouveau_mm_init(struct nouveau_mm **prmm, u32 offset, u32 length, u32 block)
+nouveau_mm_init(struct nouveau_mm *mm, u32 offset, u32 length, u32 block)
 {
-	struct nouveau_mm *rmm;
-	struct nouveau_mm_node *heap;
+	struct nouveau_mm_node *node;
 
-	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
-	if (!heap)
-		return -ENOMEM;
-	heap->offset = roundup(offset, block);
-	heap->length = rounddown(offset + length, block) - heap->offset;
-
-	rmm = kzalloc(sizeof(*rmm), GFP_KERNEL);
-	if (!rmm) {
-		kfree(heap);
-		return -ENOMEM;
+	if (block) {
+		mutex_init(&mm->mutex);
+		INIT_LIST_HEAD(&mm->nodes);
+		INIT_LIST_HEAD(&mm->free);
+		mm->block_size = block;
+		mm->heap_nodes = 0;
 	}
-	rmm->block_size = block;
-	mutex_init(&rmm->mutex);
-	INIT_LIST_HEAD(&rmm->nodes);
-	INIT_LIST_HEAD(&rmm->free);
-	list_add(&heap->nl_entry, &rmm->nodes);
-	list_add(&heap->fl_entry, &rmm->free);
 
-	*prmm = rmm;
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+	node->offset = roundup(offset, mm->block_size);
+	node->length = rounddown(offset + length, mm->block_size) - node->offset;
+
+	list_add_tail(&node->nl_entry, &mm->nodes);
+	list_add_tail(&node->fl_entry, &mm->free);
+	mm->heap_nodes++;
 	return 0;
 }
 
 int
-nouveau_mm_fini(struct nouveau_mm **prmm)
+nouveau_mm_fini(struct nouveau_mm *mm)
 {
-	struct nouveau_mm *rmm = *prmm;
 	struct nouveau_mm_node *node, *heap =
-		list_first_entry(&rmm->nodes, struct nouveau_mm_node, nl_entry);
+		list_first_entry(&mm->nodes, struct nouveau_mm_node, nl_entry);
+	int nodes = 0;
 
-	if (!list_is_singular(&rmm->nodes)) {
-		printk(KERN_ERR "nouveau_mm not empty at destroy time!\n");
-		list_for_each_entry(node, &rmm->nodes, nl_entry) {
-			printk(KERN_ERR "0x%02x: 0x%08x 0x%08x\n",
-			       node->type, node->offset, node->length);
+	list_for_each_entry(node, &mm->nodes, nl_entry) {
+		if (nodes++ == mm->heap_nodes) {
+			printk(KERN_ERR "nouveau_mm in use at destroy time!\n");
+			list_for_each_entry(node, &mm->nodes, nl_entry) {
+				printk(KERN_ERR "0x%02x: 0x%08x 0x%08x\n",
+				       node->type, node->offset, node->length);
+			}
+			WARN_ON(1);
+			return -EBUSY;
 		}
-		WARN_ON(1);
-		return -EBUSY;
 	}
 
 	kfree(heap);
-	kfree(rmm);
-	*prmm = NULL;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h
index b9c016d..57a600c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.h
@@ -42,10 +42,11 @@
 	struct mutex mutex;
 
 	u32 block_size;
+	int heap_nodes;
 };
 
-int  nouveau_mm_init(struct nouveau_mm **, u32 offset, u32 length, u32 block);
-int  nouveau_mm_fini(struct nouveau_mm **);
+int  nouveau_mm_init(struct nouveau_mm *, u32 offset, u32 length, u32 block);
+int  nouveau_mm_fini(struct nouveau_mm *);
 int  nouveau_mm_pre(struct nouveau_mm *);
 int  nouveau_mm_get(struct nouveau_mm *, int type, u32 size, u32 size_nc,
 		    u32 align, struct nouveau_mm_node **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 159b7c4..02222c5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -693,6 +693,7 @@
 static int
 nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm)
 {
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	struct nouveau_gpuobj *pgd = NULL;
 	struct nouveau_vm_pgd *vpgd;
@@ -722,6 +723,9 @@
 	nv_wo32(chan->ramin, 0x020c, 0x000000ff);
 
 	/* map display semaphore buffers into channel's vm */
+	if (dev_priv->card_type >= NV_D0)
+		return 0;
+
 	for (i = 0; i < 2; i++) {
 		struct nv50_display_crtc *dispc = &nv50_display(dev)->crtc[i];
 
@@ -746,7 +750,7 @@
 	int ret, i;
 
 	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
-	if (dev_priv->card_type == NV_C0)
+	if (dev_priv->card_type >= NV_C0)
 		return nvc0_gpuobj_channel_init(chan, vm);
 
 	/* Allocate a chunk of memory for per-channel object storage */
@@ -793,7 +797,7 @@
 			return ret;
 
 		/* dma objects for display sync channel semaphore blocks */
-		for (i = 0; i < 2; i++) {
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
 			struct nouveau_gpuobj *sem = NULL;
 			struct nv50_display_crtc *dispc =
 				&nv50_display(dev)->crtc[i];
@@ -875,18 +879,18 @@
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	if (dev_priv->card_type >= NV_50) {
+	if (dev_priv->card_type >= NV_50 && dev_priv->card_type <= NV_C0) {
 		struct nv50_display *disp = nv50_display(dev);
 
-		for (i = 0; i < 2; i++) {
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
 			struct nv50_display_crtc *dispc = &disp->crtc[i];
 			nouveau_bo_vma_del(dispc->sem.bo, &chan->dispc_vma[i]);
 		}
-
-		nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd);
-		nouveau_gpuobj_ref(NULL, &chan->vm_pd);
 	}
 
+	nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd);
+	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
+
 	if (drm_mm_initialized(&chan->ramin_heap))
 		drm_mm_takedown(&chan->ramin_heap);
 	nouveau_gpuobj_ref(NULL, &chan->ramin);
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index ef9dec0..9f178aa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -127,13 +127,57 @@
 
 	entry += ramcfg * recordlen;
 	if (entry[1] >= pm->memtimings.nr_timing) {
-		NV_WARN(dev, "timingset %d does not exist\n", entry[1]);
+		if (entry[1] != 0xff)
+			NV_WARN(dev, "timingset %d does not exist\n", entry[1]);
 		return NULL;
 	}
 
 	return &pm->memtimings.timing[entry[1]];
 }
 
+static void
+nouveau_perf_voltage(struct drm_device *dev, struct bit_entry *P,
+		     struct nouveau_pm_level *perflvl)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	u8 *vmap;
+	int id;
+
+	id = perflvl->volt_min;
+	perflvl->volt_min = 0;
+
+	/* boards using voltage table version <0x40 store the voltage
+	 * level directly in the perflvl entry as a multiple of 10mV
+	 */
+	if (dev_priv->engine.pm.voltage.version < 0x40) {
+		perflvl->volt_min = id * 10000;
+		perflvl->volt_max = perflvl->volt_min;
+		return;
+	}
+
+	/* on newer ones, the perflvl stores an index into yet another
+	 * vbios table containing a min/max voltage value for the perflvl
+	 */
+	if (P->version != 2 || P->length < 34) {
+		NV_DEBUG(dev, "where's our volt map table ptr? %d %d\n",
+			 P->version, P->length);
+		return;
+	}
+
+	vmap = ROMPTR(bios, P->data[32]);
+	if (!vmap) {
+		NV_DEBUG(dev, "volt map table pointer invalid\n");
+		return;
+	}
+
+	if (id < vmap[3]) {
+		vmap += vmap[1] + (vmap[2] * id);
+		perflvl->volt_min = ROM32(vmap[0]);
+		perflvl->volt_max = ROM32(vmap[4]);
+	}
+}
+
 void
 nouveau_perf_init(struct drm_device *dev)
 {
@@ -141,6 +185,8 @@
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct nvbios *bios = &dev_priv->vbios;
 	struct bit_entry P;
+	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
+	struct nouveau_pm_tbl_header mt_hdr;
 	u8 version, headerlen, recordlen, entries;
 	u8 *perf, *entry;
 	int vid, i;
@@ -188,6 +234,22 @@
 	}
 
 	entry = perf + headerlen;
+
+	/* For version 0x15, initialize memtiming table */
+	if(version == 0x15) {
+		memtimings->timing =
+				kcalloc(entries, sizeof(*memtimings->timing), GFP_KERNEL);
+		if(!memtimings) {
+			NV_WARN(dev,"Could not allocate memtiming table\n");
+			return;
+		}
+
+		mt_hdr.entry_cnt = entries;
+		mt_hdr.entry_len = 14;
+		mt_hdr.version = version;
+		mt_hdr.header_len = 4;
+	}
+
 	for (i = 0; i < entries; i++) {
 		struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl];
 
@@ -203,7 +265,8 @@
 		case 0x13:
 		case 0x15:
 			perflvl->fanspeed = entry[55];
-			perflvl->voltage = (recordlen > 56) ? entry[56] : 0;
+			if (recordlen > 56)
+				perflvl->volt_min = entry[56];
 			perflvl->core = ROM32(entry[1]) * 10;
 			perflvl->memory = ROM32(entry[5]) * 20;
 			break;
@@ -211,9 +274,10 @@
 		case 0x23:
 		case 0x24:
 			perflvl->fanspeed = entry[4];
-			perflvl->voltage = entry[5];
-			perflvl->core = ROM16(entry[6]) * 1000;
-
+			perflvl->volt_min = entry[5];
+			perflvl->shader = ROM16(entry[6]) * 1000;
+			perflvl->core = perflvl->shader;
+			perflvl->core += (signed char)entry[8] * 1000;
 			if (dev_priv->chipset == 0x49 ||
 			    dev_priv->chipset == 0x4b)
 				perflvl->memory = ROM16(entry[11]) * 1000;
@@ -223,7 +287,7 @@
 			break;
 		case 0x25:
 			perflvl->fanspeed = entry[4];
-			perflvl->voltage = entry[5];
+			perflvl->volt_min = entry[5];
 			perflvl->core = ROM16(entry[6]) * 1000;
 			perflvl->shader = ROM16(entry[10]) * 1000;
 			perflvl->memory = ROM16(entry[12]) * 1000;
@@ -232,7 +296,7 @@
 			perflvl->memscript = ROM16(entry[2]);
 		case 0x35:
 			perflvl->fanspeed = entry[6];
-			perflvl->voltage = entry[7];
+			perflvl->volt_min = entry[7];
 			perflvl->core = ROM16(entry[8]) * 1000;
 			perflvl->shader = ROM16(entry[10]) * 1000;
 			perflvl->memory = ROM16(entry[12]) * 1000;
@@ -240,30 +304,34 @@
 			perflvl->unk05 = ROM16(entry[16]) * 1000;
 			break;
 		case 0x40:
-#define subent(n) entry[perf[2] + ((n) * perf[3])]
+#define subent(n) (ROM16(entry[perf[2] + ((n) * perf[3])]) & 0xfff) * 1000
 			perflvl->fanspeed = 0; /*XXX*/
-			perflvl->voltage = entry[2];
+			perflvl->volt_min = entry[2];
 			if (dev_priv->card_type == NV_50) {
-				perflvl->core = ROM16(subent(0)) & 0xfff;
-				perflvl->shader = ROM16(subent(1)) & 0xfff;
-				perflvl->memory = ROM16(subent(2)) & 0xfff;
+				perflvl->core   = subent(0);
+				perflvl->shader = subent(1);
+				perflvl->memory = subent(2);
+				perflvl->vdec   = subent(3);
+				perflvl->unka0  = subent(4);
 			} else {
-				perflvl->shader = ROM16(subent(3)) & 0xfff;
+				perflvl->hub06  = subent(0);
+				perflvl->hub01  = subent(1);
+				perflvl->copy   = subent(2);
+				perflvl->shader = subent(3);
+				perflvl->rop    = subent(4);
+				perflvl->memory = subent(5);
+				perflvl->vdec   = subent(6);
+				perflvl->daemon = subent(10);
+				perflvl->hub07  = subent(11);
 				perflvl->core   = perflvl->shader / 2;
-				perflvl->unk0a  = ROM16(subent(4)) & 0xfff;
-				perflvl->memory = ROM16(subent(5)) & 0xfff;
 			}
-
-			perflvl->core *= 1000;
-			perflvl->shader *= 1000;
-			perflvl->memory *= 1000;
-			perflvl->unk0a *= 1000;
 			break;
 		}
 
 		/* make sure vid is valid */
-		if (pm->voltage.supported && perflvl->voltage) {
-			vid = nouveau_volt_vid_lookup(dev, perflvl->voltage);
+		nouveau_perf_voltage(dev, &P, perflvl);
+		if (pm->voltage.supported && perflvl->volt_min) {
+			vid = nouveau_volt_vid_lookup(dev, perflvl->volt_min);
 			if (vid < 0) {
 				NV_DEBUG(dev, "drop perflvl %d, bad vid\n", i);
 				entry += recordlen;
@@ -272,7 +340,11 @@
 		}
 
 		/* get the corresponding memory timings */
-		if (version > 0x15) {
+		if (version == 0x15) {
+			memtimings->timing[i].id = i;
+			nv30_mem_timing_entry(dev,&mt_hdr,(struct nouveau_pm_tbl_entry*) &entry[41],0,&memtimings->timing[i]);
+			perflvl->timing = &memtimings->timing[i];
+		} else if (version > 0x15) {
 			/* last 3 args are for < 0x40, ignored for >= 0x40 */
 			perflvl->timing =
 				nouveau_perf_timing(dev, &P,
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index da8d994..a539fd2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -64,18 +64,26 @@
 	if (perflvl == pm->cur)
 		return 0;
 
-	if (pm->voltage.supported && pm->voltage_set && perflvl->voltage) {
-		ret = pm->voltage_set(dev, perflvl->voltage);
+	if (pm->voltage.supported && pm->voltage_set && perflvl->volt_min) {
+		ret = pm->voltage_set(dev, perflvl->volt_min);
 		if (ret) {
 			NV_ERROR(dev, "voltage_set %d failed: %d\n",
-				 perflvl->voltage, ret);
+				 perflvl->volt_min, ret);
 		}
 	}
 
-	nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
-	nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
-	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
-	nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
+	if (pm->clocks_pre) {
+		void *state = pm->clocks_pre(dev, perflvl);
+		if (IS_ERR(state))
+			return PTR_ERR(state);
+		pm->clocks_set(dev, state);
+	} else
+	if (pm->clock_set) {
+		nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
+		nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
+		nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
+		nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
+	}
 
 	pm->cur = perflvl;
 	return 0;
@@ -92,9 +100,6 @@
 	if (nouveau_perflvl_wr != 7777)
 		return -EPERM;
 
-	if (!pm->clock_set)
-		return -EINVAL;
-
 	if (!strncmp(profile, "boot", 4))
 		perflvl = &pm->boot;
 	else {
@@ -123,31 +128,37 @@
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	int ret;
 
-	if (!pm->clock_get)
-		return -EINVAL;
-
 	memset(perflvl, 0, sizeof(*perflvl));
 
-	ret = pm->clock_get(dev, PLL_CORE);
-	if (ret > 0)
-		perflvl->core = ret;
+	if (pm->clocks_get) {
+		ret = pm->clocks_get(dev, perflvl);
+		if (ret)
+			return ret;
+	} else
+	if (pm->clock_get) {
+		ret = pm->clock_get(dev, PLL_CORE);
+		if (ret > 0)
+			perflvl->core = ret;
 
-	ret = pm->clock_get(dev, PLL_MEMORY);
-	if (ret > 0)
-		perflvl->memory = ret;
+		ret = pm->clock_get(dev, PLL_MEMORY);
+		if (ret > 0)
+			perflvl->memory = ret;
 
-	ret = pm->clock_get(dev, PLL_SHADER);
-	if (ret > 0)
-		perflvl->shader = ret;
+		ret = pm->clock_get(dev, PLL_SHADER);
+		if (ret > 0)
+			perflvl->shader = ret;
 
-	ret = pm->clock_get(dev, PLL_UNK05);
-	if (ret > 0)
-		perflvl->unk05 = ret;
+		ret = pm->clock_get(dev, PLL_UNK05);
+		if (ret > 0)
+			perflvl->unk05 = ret;
+	}
 
 	if (pm->voltage.supported && pm->voltage_get) {
 		ret = pm->voltage_get(dev);
-		if (ret > 0)
-			perflvl->voltage = ret;
+		if (ret > 0) {
+			perflvl->volt_min = ret;
+			perflvl->volt_max = ret;
+		}
 	}
 
 	return 0;
@@ -156,7 +167,7 @@
 static void
 nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 {
-	char c[16], s[16], v[16], f[16], t[16];
+	char c[16], s[16], v[32], f[16], t[16], m[16];
 
 	c[0] = '\0';
 	if (perflvl->core)
@@ -166,9 +177,19 @@
 	if (perflvl->shader)
 		snprintf(s, sizeof(s), " shader %dMHz", perflvl->shader / 1000);
 
+	m[0] = '\0';
+	if (perflvl->memory)
+		snprintf(m, sizeof(m), " memory %dMHz", perflvl->memory / 1000);
+
 	v[0] = '\0';
-	if (perflvl->voltage)
-		snprintf(v, sizeof(v), " voltage %dmV", perflvl->voltage * 10);
+	if (perflvl->volt_min && perflvl->volt_min != perflvl->volt_max) {
+		snprintf(v, sizeof(v), " voltage %dmV-%dmV",
+			 perflvl->volt_min / 1000, perflvl->volt_max / 1000);
+	} else
+	if (perflvl->volt_min) {
+		snprintf(v, sizeof(v), " voltage %dmV",
+			 perflvl->volt_min / 1000);
+	}
 
 	f[0] = '\0';
 	if (perflvl->fanspeed)
@@ -178,8 +199,7 @@
 	if (perflvl->timing)
 		snprintf(t, sizeof(t), " timing %d", perflvl->timing->id);
 
-	snprintf(ptr, len, "memory %dMHz%s%s%s%s%s\n", perflvl->memory / 1000,
-		 c, s, v, f, t);
+	snprintf(ptr, len, "%s%s%s%s%s%s\n", c, s, m, t, v, f);
 }
 
 static ssize_t
@@ -190,7 +210,7 @@
 	char *ptr = buf;
 	int len = PAGE_SIZE;
 
-	snprintf(ptr, len, "%d: ", perflvl->id);
+	snprintf(ptr, len, "%d:", perflvl->id);
 	ptr += strlen(buf);
 	len -= strlen(buf);
 
@@ -211,9 +231,9 @@
 	if (!pm->cur)
 		snprintf(ptr, len, "setting: boot\n");
 	else if (pm->cur == &pm->boot)
-		snprintf(ptr, len, "setting: boot\nc: ");
+		snprintf(ptr, len, "setting: boot\nc:");
 	else
-		snprintf(ptr, len, "setting: static %d\nc: ", pm->cur->id);
+		snprintf(ptr, len, "setting: static %d\nc:", pm->cur->id);
 	ptr += strlen(buf);
 	len -= strlen(buf);
 
@@ -292,7 +312,7 @@
 	}
 }
 
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 static ssize_t
 nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
@@ -409,7 +429,7 @@
 static int
 nouveau_hwmon_init(struct drm_device *dev)
 {
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct device *hwmon_dev;
@@ -442,7 +462,7 @@
 static void
 nouveau_hwmon_fini(struct drm_device *dev)
 {
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 
@@ -488,7 +508,7 @@
 	NV_INFO(dev, "%d available performance level(s)\n", pm->nr_perflvl);
 	for (i = 0; i < pm->nr_perflvl; i++) {
 		nouveau_pm_perflvl_info(&pm->perflvl[i], info, sizeof(info));
-		NV_INFO(dev, "%d: %s", pm->perflvl[i].id, info);
+		NV_INFO(dev, "%d:%s", pm->perflvl[i].id, info);
 	}
 
 	/* determine current ("boot") performance level */
@@ -498,7 +518,7 @@
 		pm->cur = &pm->boot;
 
 		nouveau_pm_perflvl_info(&pm->boot, info, sizeof(info));
-		NV_INFO(dev, "c: %s", info);
+		NV_INFO(dev, "c:%s", info);
 	}
 
 	/* switch performance levels now if requested */
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.h b/drivers/gpu/drm/nouveau/nouveau_pm.h
index 4a9838dd..8ac02cd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.h
@@ -52,6 +52,11 @@
 			u32 id, int khz);
 void nv04_pm_clock_set(struct drm_device *, void *);
 
+/* nv40_pm.c */
+int nv40_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
+void *nv40_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *);
+void nv40_pm_clocks_set(struct drm_device *, void *);
+
 /* nv50_pm.c */
 int nv50_pm_clock_get(struct drm_device *, u32 id);
 void *nv50_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
@@ -59,10 +64,12 @@
 void nv50_pm_clock_set(struct drm_device *, void *);
 
 /* nva3_pm.c */
-int nva3_pm_clock_get(struct drm_device *, u32 id);
-void *nva3_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
-			u32 id, int khz);
-void nva3_pm_clock_set(struct drm_device *, void *);
+int nva3_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
+void *nva3_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *);
+void nva3_pm_clocks_set(struct drm_device *, void *);
+
+/* nvc0_pm.c */
+int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
 
 /* nouveau_temp.c */
 void nouveau_temp_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index f18cdfc..43a96b9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -826,9 +826,12 @@
 #define NV50_PDISPLAY_SOR_DPMS_STATE_ACTIVE                          0x00030000
 #define NV50_PDISPLAY_SOR_DPMS_STATE_BLANKED                         0x00080000
 #define NV50_PDISPLAY_SOR_DPMS_STATE_WAIT                            0x10000000
-#define NV50_PDISPLAY_SOR_BACKLIGHT                                  0x0061c084
-#define NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE                           0x80000000
-#define NV50_PDISPLAY_SOR_BACKLIGHT_LEVEL                            0x00000fff
+#define NV50_PDISP_SOR_PWM_DIV(i)                     (0x0061c080 + (i) * 0x800)
+#define NV50_PDISP_SOR_PWM_CTL(i)                     (0x0061c084 + (i) * 0x800)
+#define NV50_PDISP_SOR_PWM_CTL_NEW                                   0x80000000
+#define NVA3_PDISP_SOR_PWM_CTL_UNK                                   0x40000000
+#define NV50_PDISP_SOR_PWM_CTL_VAL                                   0x000007ff
+#define NVA3_PDISP_SOR_PWM_CTL_VAL                                   0x00ffffff
 #define NV50_SOR_DP_CTRL(i, l)           (0x0061c10c + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_CTRL_ENABLED                                     0x00000001
 #define NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED                      0x00004000
@@ -843,7 +846,7 @@
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_2                          0x02000000
 #define NV50_SOR_DP_UNK118(i, l)         (0x0061c118 + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_UNK120(i, l)         (0x0061c120 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK128(i, l)         (0x0061c128 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_SCFG(i, l)           (0x0061c128 + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_UNK130(i, l)         (0x0061c130 + (i) * 0x800 + (l) * 0x80)
 
 #define NV50_PDISPLAY_USER(i)                        ((i) * 0x1000 + 0x00640000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 2706cb3..b75258a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -12,8 +12,8 @@
 	struct drm_device *dev;
 
 	dma_addr_t *pages;
-	bool *ttm_alloced;
 	unsigned nr_pages;
+	bool unmap_pages;
 
 	u64 offset;
 	bool bound;
@@ -26,43 +26,28 @@
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
+	int i;
 
 	NV_DEBUG(nvbe->dev, "num_pages = %ld\n", num_pages);
 
-	if (nvbe->pages)
-		return -EINVAL;
+	nvbe->pages = dma_addrs;
+	nvbe->nr_pages = num_pages;
+	nvbe->unmap_pages = true;
 
-	nvbe->pages = kmalloc(sizeof(dma_addr_t) * num_pages, GFP_KERNEL);
-	if (!nvbe->pages)
-		return -ENOMEM;
-
-	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
-	if (!nvbe->ttm_alloced) {
-		kfree(nvbe->pages);
-		nvbe->pages = NULL;
-		return -ENOMEM;
+	/* this code path isn't called and is incorrect anyways */
+	if (0) { /* dma_addrs[0] != DMA_ERROR_CODE) { */
+		nvbe->unmap_pages = false;
+		return 0;
 	}
 
-	nvbe->nr_pages = 0;
-	while (num_pages--) {
-		/* this code path isn't called and is incorrect anyways */
-		if (0) { /*dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE)*/
-			nvbe->pages[nvbe->nr_pages] =
-					dma_addrs[nvbe->nr_pages];
-		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
-		} else {
-			nvbe->pages[nvbe->nr_pages] =
-				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
-				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-			if (pci_dma_mapping_error(dev->pdev,
-						  nvbe->pages[nvbe->nr_pages])) {
-				be->func->clear(be);
-				return -EFAULT;
-			}
-			nvbe->ttm_alloced[nvbe->nr_pages] = false;
+	for (i = 0; i < num_pages; i++) {
+		nvbe->pages[i] = pci_map_page(dev->pdev, pages[i], 0,
+					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(dev->pdev, nvbe->pages[i])) {
+			nvbe->nr_pages = --i;
+			be->func->clear(be);
+			return -EFAULT;
 		}
-
-		nvbe->nr_pages++;
 	}
 
 	return 0;
@@ -72,25 +57,16 @@
 nouveau_sgdma_clear(struct ttm_backend *be)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
-	struct drm_device *dev;
+	struct drm_device *dev = nvbe->dev;
 
-	if (nvbe && nvbe->pages) {
-		dev = nvbe->dev;
-		NV_DEBUG(dev, "\n");
+	if (nvbe->bound)
+		be->func->unbind(be);
 
-		if (nvbe->bound)
-			be->func->unbind(be);
-
+	if (nvbe->unmap_pages) {
 		while (nvbe->nr_pages--) {
-			if (!nvbe->ttm_alloced[nvbe->nr_pages])
-				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		}
-		kfree(nvbe->pages);
-		kfree(nvbe->ttm_alloced);
-		nvbe->pages = NULL;
-		nvbe->ttm_alloced = NULL;
-		nvbe->nr_pages = 0;
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 10656e4..82478e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -286,9 +286,9 @@
 		engine->gpio.get		= nv10_gpio_get;
 		engine->gpio.set		= nv10_gpio_set;
 		engine->gpio.irq_enable		= NULL;
-		engine->pm.clock_get		= nv04_pm_clock_get;
-		engine->pm.clock_pre		= nv04_pm_clock_pre;
-		engine->pm.clock_set		= nv04_pm_clock_set;
+		engine->pm.clocks_get		= nv40_pm_clocks_get;
+		engine->pm.clocks_pre		= nv40_pm_clocks_pre;
+		engine->pm.clocks_set		= nv40_pm_clocks_set;
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->pm.temp_get		= nv40_temp_get;
@@ -299,7 +299,7 @@
 	case 0x50:
 	case 0x80: /* gotta love NVIDIA's consistency.. */
 	case 0x90:
-	case 0xA0:
+	case 0xa0:
 		engine->instmem.init		= nv50_instmem_init;
 		engine->instmem.takedown	= nv50_instmem_takedown;
 		engine->instmem.suspend		= nv50_instmem_suspend;
@@ -359,9 +359,9 @@
 			engine->pm.clock_set	= nv50_pm_clock_set;
 			break;
 		default:
-			engine->pm.clock_get	= nva3_pm_clock_get;
-			engine->pm.clock_pre	= nva3_pm_clock_pre;
-			engine->pm.clock_set	= nva3_pm_clock_set;
+			engine->pm.clocks_get	= nva3_pm_clocks_get;
+			engine->pm.clocks_pre	= nva3_pm_clocks_pre;
+			engine->pm.clocks_set	= nva3_pm_clocks_set;
 			break;
 		}
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
@@ -376,7 +376,7 @@
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nv50_vram_flags_valid;
 		break;
-	case 0xC0:
+	case 0xc0:
 		engine->instmem.init		= nvc0_instmem_init;
 		engine->instmem.takedown	= nvc0_instmem_takedown;
 		engine->instmem.suspend		= nvc0_instmem_suspend;
@@ -422,12 +422,73 @@
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nvc0_vram_flags_valid;
 		engine->pm.temp_get		= nv84_temp_get;
+		engine->pm.clocks_get		= nvc0_pm_clocks_get;
+		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
+		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
+		break;
+	case 0xd0:
+		engine->instmem.init		= nvc0_instmem_init;
+		engine->instmem.takedown	= nvc0_instmem_takedown;
+		engine->instmem.suspend		= nvc0_instmem_suspend;
+		engine->instmem.resume		= nvc0_instmem_resume;
+		engine->instmem.get		= nv50_instmem_get;
+		engine->instmem.put		= nv50_instmem_put;
+		engine->instmem.map		= nv50_instmem_map;
+		engine->instmem.unmap		= nv50_instmem_unmap;
+		engine->instmem.flush		= nv84_instmem_flush;
+		engine->mc.init			= nv50_mc_init;
+		engine->mc.takedown		= nv50_mc_takedown;
+		engine->timer.init		= nv04_timer_init;
+		engine->timer.read		= nv04_timer_read;
+		engine->timer.takedown		= nv04_timer_takedown;
+		engine->fb.init			= nvc0_fb_init;
+		engine->fb.takedown		= nvc0_fb_takedown;
+		engine->fifo.channels		= 128;
+		engine->fifo.init		= nvc0_fifo_init;
+		engine->fifo.takedown		= nvc0_fifo_takedown;
+		engine->fifo.disable		= nvc0_fifo_disable;
+		engine->fifo.enable		= nvc0_fifo_enable;
+		engine->fifo.reassign		= nvc0_fifo_reassign;
+		engine->fifo.channel_id		= nvc0_fifo_channel_id;
+		engine->fifo.create_context	= nvc0_fifo_create_context;
+		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
+		engine->fifo.load_context	= nvc0_fifo_load_context;
+		engine->fifo.unload_context	= nvc0_fifo_unload_context;
+		engine->display.early_init	= nouveau_stub_init;
+		engine->display.late_takedown	= nouveau_stub_takedown;
+		engine->display.create		= nvd0_display_create;
+		engine->display.init		= nvd0_display_init;
+		engine->display.destroy		= nvd0_display_destroy;
+		engine->gpio.init		= nv50_gpio_init;
+		engine->gpio.takedown		= nouveau_stub_takedown;
+		engine->gpio.get		= nvd0_gpio_get;
+		engine->gpio.set		= nvd0_gpio_set;
+		engine->gpio.irq_register	= nv50_gpio_irq_register;
+		engine->gpio.irq_unregister	= nv50_gpio_irq_unregister;
+		engine->gpio.irq_enable		= nv50_gpio_irq_enable;
+		engine->vram.init		= nvc0_vram_init;
+		engine->vram.takedown		= nv50_vram_fini;
+		engine->vram.get		= nvc0_vram_new;
+		engine->vram.put		= nv50_vram_del;
+		engine->vram.flags_valid	= nvc0_vram_flags_valid;
+		engine->pm.clocks_get		= nvc0_pm_clocks_get;
+		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
+		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		break;
 	default:
 		NV_ERROR(dev, "NV%02x unsupported\n", dev_priv->chipset);
 		return 1;
 	}
 
+	/* headless mode */
+	if (nouveau_modeset == 2) {
+		engine->display.early_init = nouveau_stub_init;
+		engine->display.late_takedown = nouveau_stub_takedown;
+		engine->display.create = nouveau_stub_init;
+		engine->display.init = nouveau_stub_init;
+		engine->display.destroy = nouveau_stub_takedown;
+	}
+
 	return 0;
 }
 
@@ -449,21 +510,6 @@
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
-static int
-nouveau_card_init_channel(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = nouveau_channel_alloc(dev, &dev_priv->channel, NULL,
-				    NvDmaFB, NvDmaTT);
-	if (ret)
-		return ret;
-
-	mutex_unlock(&dev_priv->channel->mutex);
-	return 0;
-}
-
 static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
 					 enum vga_switcheroo_state state)
 {
@@ -630,8 +676,11 @@
 			break;
 		}
 
-		if (dev_priv->card_type == NV_40)
-			nv40_mpeg_create(dev);
+		if (dev_priv->card_type == NV_40 ||
+		    dev_priv->chipset == 0x31 ||
+		    dev_priv->chipset == 0x34 ||
+		    dev_priv->chipset == 0x36)
+			nv31_mpeg_create(dev);
 		else
 		if (dev_priv->card_type == NV_50 &&
 		    (dev_priv->chipset < 0x98 || dev_priv->chipset == 0xa0))
@@ -651,41 +700,69 @@
 			goto out_engine;
 	}
 
-	ret = engine->display.create(dev);
+	ret = nouveau_irq_init(dev);
 	if (ret)
 		goto out_fifo;
 
-	ret = drm_vblank_init(dev, nv_two_heads(dev) ? 2 : 1);
-	if (ret)
-		goto out_vblank;
+	/* initialise general modesetting */
+	drm_mode_config_init(dev);
+	drm_mode_create_scaling_mode_property(dev);
+	drm_mode_create_dithering_property(dev);
+	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
+	dev->mode_config.fb_base = pci_resource_start(dev->pdev, 1);
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	if (dev_priv->card_type < NV_10) {
+		dev->mode_config.max_width = 2048;
+		dev->mode_config.max_height = 2048;
+	} else
+	if (dev_priv->card_type < NV_50) {
+		dev->mode_config.max_width = 4096;
+		dev->mode_config.max_height = 4096;
+	} else {
+		dev->mode_config.max_width = 8192;
+		dev->mode_config.max_height = 8192;
+	}
 
-	ret = nouveau_irq_init(dev);
+	ret = engine->display.create(dev);
 	if (ret)
-		goto out_vblank;
+		goto out_irq;
 
-	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
+	nouveau_backlight_init(dev);
 
 	if (dev_priv->eng[NVOBJ_ENGINE_GR]) {
 		ret = nouveau_fence_init(dev);
 		if (ret)
-			goto out_irq;
+			goto out_disp;
 
-		ret = nouveau_card_init_channel(dev);
+		ret = nouveau_channel_alloc(dev, &dev_priv->channel, NULL,
+					    NvDmaFB, NvDmaTT);
 		if (ret)
 			goto out_fence;
+
+		mutex_unlock(&dev_priv->channel->mutex);
 	}
 
-	nouveau_fbcon_init(dev);
-	drm_kms_helper_poll_init(dev);
+	if (dev->mode_config.num_crtc) {
+		ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
+		if (ret)
+			goto out_chan;
+
+		nouveau_fbcon_init(dev);
+		drm_kms_helper_poll_init(dev);
+	}
+
 	return 0;
 
+out_chan:
+	nouveau_channel_put_unlocked(&dev_priv->channel);
 out_fence:
 	nouveau_fence_fini(dev);
+out_disp:
+	nouveau_backlight_exit(dev);
+	engine->display.destroy(dev);
 out_irq:
 	nouveau_irq_fini(dev);
-out_vblank:
-	drm_vblank_cleanup(dev);
-	engine->display.destroy(dev);
 out_fifo:
 	if (!dev_priv->noaccel)
 		engine->fifo.takedown(dev);
@@ -732,15 +809,20 @@
 	struct nouveau_engine *engine = &dev_priv->engine;
 	int e;
 
-	drm_kms_helper_poll_fini(dev);
-	nouveau_fbcon_fini(dev);
+	if (dev->mode_config.num_crtc) {
+		drm_kms_helper_poll_fini(dev);
+		nouveau_fbcon_fini(dev);
+		drm_vblank_cleanup(dev);
+	}
 
 	if (dev_priv->channel) {
 		nouveau_channel_put_unlocked(&dev_priv->channel);
 		nouveau_fence_fini(dev);
 	}
 
+	nouveau_backlight_exit(dev);
 	engine->display.destroy(dev);
+	drm_mode_config_cleanup(dev);
 
 	if (!dev_priv->noaccel) {
 		engine->fifo.takedown(dev);
@@ -774,7 +856,6 @@
 	engine->vram.takedown(dev);
 
 	nouveau_irq_fini(dev);
-	drm_vblank_cleanup(dev);
 
 	nouveau_pm_fini(dev);
 	nouveau_bios_takedown(dev);
@@ -907,7 +988,7 @@
 int nouveau_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_nouveau_private *dev_priv;
-	uint32_t reg0;
+	uint32_t reg0, strap;
 	resource_size_t mmio_start_offs;
 	int ret;
 
@@ -951,13 +1032,11 @@
 
 	/* Time to determine the card architecture */
 	reg0 = nv_rd32(dev, NV03_PMC_BOOT_0);
-	dev_priv->stepping = 0; /* XXX: add stepping for pre-NV10? */
 
 	/* We're dealing with >=NV10 */
 	if ((reg0 & 0x0f000000) > 0) {
 		/* Bit 27-20 contain the architecture in hex */
 		dev_priv->chipset = (reg0 & 0xff00000) >> 20;
-		dev_priv->stepping = (reg0 & 0xff);
 	/* NV04 or NV05 */
 	} else if ((reg0 & 0xff00fff0) == 0x20004000) {
 		if (reg0 & 0x00f00000)
@@ -987,6 +1066,9 @@
 	case 0xc0:
 		dev_priv->card_type = NV_C0;
 		break;
+	case 0xd0:
+		dev_priv->card_type = NV_D0;
+		break;
 	default:
 		NV_INFO(dev, "Unsupported chipset 0x%08x\n", reg0);
 		ret = -EINVAL;
@@ -996,6 +1078,23 @@
 	NV_INFO(dev, "Detected an NV%2x generation card (0x%08x)\n",
 		dev_priv->card_type, reg0);
 
+	/* determine frequency of timing crystal */
+	strap = nv_rd32(dev, 0x101000);
+	if ( dev_priv->chipset < 0x17 ||
+	    (dev_priv->chipset >= 0x20 && dev_priv->chipset <= 0x25))
+		strap &= 0x00000040;
+	else
+		strap &= 0x00400040;
+
+	switch (strap) {
+	case 0x00000000: dev_priv->crystal = 13500; break;
+	case 0x00000040: dev_priv->crystal = 14318; break;
+	case 0x00400000: dev_priv->crystal = 27000; break;
+	case 0x00400040: dev_priv->crystal = 25000; break;
+	}
+
+	NV_DEBUG(dev, "crystal freq: %dKHz\n", dev_priv->crystal);
+
 	/* Determine whether we'll attempt acceleration or not, some
 	 * cards are disabled by default here due to them being known
 	 * non-functional, or never been tested due to lack of hw.
@@ -1030,7 +1129,7 @@
 			ioremap(pci_resource_start(dev->pdev, ramin_bar),
 				dev_priv->ramin_size);
 		if (!dev_priv->ramin) {
-			NV_ERROR(dev, "Failed to PRAMIN BAR");
+			NV_ERROR(dev, "Failed to map PRAMIN BAR\n");
 			ret = -ENOMEM;
 			goto err_mmio;
 		}
@@ -1130,7 +1229,7 @@
 		getparam->value = 1;
 		break;
 	case NOUVEAU_GETPARAM_HAS_PAGEFLIP:
-		getparam->value = 1;
+		getparam->value = dev_priv->card_type < NV_D0;
 		break;
 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
 		/* NV40 and NV50 versions are quite different, but register
@@ -1198,6 +1297,23 @@
 	return false;
 }
 
+/* Wait until cond(data) == true, up until timeout has hit */
+bool
+nouveau_wait_cb(struct drm_device *dev, u64 timeout,
+		bool (*cond)(void *), void *data)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+	u64 start = ptimer->read(dev);
+
+	do {
+		if (cond(data) == true)
+			return true;
+	} while (ptimer->read(dev) - start < timeout);
+
+	return false;
+}
+
 /* Waits for PGRAPH to go completely idle */
 bool nouveau_wait_for_idle(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c
index 244fd38..ef0832b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.c
@@ -172,9 +172,9 @@
 			vm->map_pgt(vpgd->obj, pde, vpgt->obj);
 		}
 
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		nouveau_gpuobj_ref(NULL, &pgt);
-		mutex_lock(&vm->mm->mutex);
+		mutex_lock(&vm->mm.mutex);
 	}
 }
 
@@ -191,18 +191,18 @@
 	pgt_size  = (1 << (vm->pgt_bits + 12)) >> type;
 	pgt_size *= 8;
 
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 	ret = nouveau_gpuobj_new(vm->dev, NULL, pgt_size, 0x1000,
 				 NVOBJ_FLAG_ZERO_ALLOC, &pgt);
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	if (unlikely(ret))
 		return ret;
 
 	/* someone beat us to filling the PDE while we didn't have the lock */
 	if (unlikely(vpgt->refcount[big]++)) {
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		nouveau_gpuobj_ref(NULL, &pgt);
-		mutex_lock(&vm->mm->mutex);
+		mutex_lock(&vm->mm.mutex);
 		return 0;
 	}
 
@@ -223,10 +223,10 @@
 	u32 fpde, lpde, pde;
 	int ret;
 
-	mutex_lock(&vm->mm->mutex);
-	ret = nouveau_mm_get(vm->mm, page_shift, msize, 0, align, &vma->node);
+	mutex_lock(&vm->mm.mutex);
+	ret = nouveau_mm_get(&vm->mm, page_shift, msize, 0, align, &vma->node);
 	if (unlikely(ret != 0)) {
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		return ret;
 	}
 
@@ -245,13 +245,13 @@
 		if (ret) {
 			if (pde != fpde)
 				nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1);
-			nouveau_mm_put(vm->mm, vma->node);
-			mutex_unlock(&vm->mm->mutex);
+			nouveau_mm_put(&vm->mm, vma->node);
+			mutex_unlock(&vm->mm.mutex);
 			vma->node = NULL;
 			return ret;
 		}
 	}
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 
 	vma->vm     = vm;
 	vma->offset = (u64)vma->node->offset << 12;
@@ -270,11 +270,11 @@
 	fpde = (vma->node->offset >> vm->pgt_bits);
 	lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits;
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	nouveau_vm_unmap_pgt(vm, vma->node->type != vm->spg_shift, fpde, lpde);
-	nouveau_mm_put(vm->mm, vma->node);
+	nouveau_mm_put(&vm->mm, vma->node);
 	vma->node = NULL;
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 }
 
 int
@@ -306,7 +306,7 @@
 			block = length;
 
 	} else
-	if (dev_priv->card_type == NV_C0) {
+	if (dev_priv->card_type >= NV_C0) {
 		vm->map_pgt = nvc0_vm_map_pgt;
 		vm->map = nvc0_vm_map;
 		vm->map_sg = nvc0_vm_map_sg;
@@ -360,11 +360,11 @@
 
 	nouveau_gpuobj_ref(pgd, &vpgd->obj);
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	for (i = vm->fpde; i <= vm->lpde; i++)
 		vm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj);
 	list_add(&vpgd->head, &vm->pgd_list);
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 	return 0;
 }
 
@@ -377,7 +377,7 @@
 	if (!mpgd)
 		return;
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
 		if (vpgd->obj == mpgd) {
 			pgd = vpgd->obj;
@@ -386,7 +386,7 @@
 			break;
 		}
 	}
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 
 	nouveau_gpuobj_ref(NULL, &pgd);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h
index 579ca8c..6ce995f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.h
@@ -51,7 +51,7 @@
 
 struct nouveau_vm {
 	struct drm_device *dev;
-	struct nouveau_mm *mm;
+	struct nouveau_mm mm;
 	int refcount;
 
 	struct list_head pgd_list;
diff --git a/drivers/gpu/drm/nouveau/nouveau_volt.c b/drivers/gpu/drm/nouveau/nouveau_volt.c
index 75e87274..86d03e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_volt.c
+++ b/drivers/gpu/drm/nouveau/nouveau_volt.c
@@ -27,7 +27,7 @@
 #include "nouveau_drv.h"
 #include "nouveau_pm.h"
 
-static const enum dcb_gpio_tag vidtag[] = { 0x04, 0x05, 0x06, 0x1a };
+static const enum dcb_gpio_tag vidtag[] = { 0x04, 0x05, 0x06, 0x1a, 0x73 };
 static int nr_vidtag = sizeof(vidtag) / sizeof(vidtag[0]);
 
 int
@@ -170,6 +170,13 @@
 		 */
 		vidshift  = 2;
 		break;
+	case 0x40:
+		headerlen = volt[1];
+		recordlen = volt[2];
+		entries   = volt[3]; /* not a clue what the entries are for.. */
+		vidmask   = volt[11]; /* guess.. */
+		vidshift  = 0;
+		break;
 	default:
 		NV_WARN(dev, "voltage table 0x%02x unknown\n", volt[0]);
 		return;
@@ -197,16 +204,37 @@
 	}
 
 	/* parse vbios entries into common format */
-	voltage->level = kcalloc(entries, sizeof(*voltage->level), GFP_KERNEL);
-	if (!voltage->level)
-		return;
+	voltage->version = volt[0];
+	if (voltage->version < 0x40) {
+		voltage->nr_level = entries;
+		voltage->level =
+			kcalloc(entries, sizeof(*voltage->level), GFP_KERNEL);
+		if (!voltage->level)
+			return;
 
-	entry = volt + headerlen;
-	for (i = 0; i < entries; i++, entry += recordlen) {
-		voltage->level[i].voltage = entry[0];
-		voltage->level[i].vid     = entry[1] >> vidshift;
+		entry = volt + headerlen;
+		for (i = 0; i < entries; i++, entry += recordlen) {
+			voltage->level[i].voltage = entry[0] * 10000;
+			voltage->level[i].vid     = entry[1] >> vidshift;
+		}
+	} else {
+		u32 volt_uv = ROM32(volt[4]);
+		s16 step_uv = ROM16(volt[8]);
+		u8 vid;
+
+		voltage->nr_level = voltage->vid_mask + 1;
+		voltage->level = kcalloc(voltage->nr_level,
+					 sizeof(*voltage->level), GFP_KERNEL);
+		if (!voltage->level)
+			return;
+
+		for (vid = 0; vid <= voltage->vid_mask; vid++) {
+			voltage->level[vid].voltage = volt_uv;
+			voltage->level[vid].vid = vid;
+			volt_uv += step_uv;
+		}
 	}
-	voltage->nr_level  = entries;
+
 	voltage->supported = true;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index 1715e14..6bd8518 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c
@@ -126,27 +126,6 @@
 
 	nouveau_hw_save_vga_fonts(dev, 1);
 
-	drm_mode_config_init(dev);
-	drm_mode_create_scaling_mode_property(dev);
-	drm_mode_create_dithering_property(dev);
-
-	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
-
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-	switch (dev_priv->card_type) {
-	case NV_04:
-		dev->mode_config.max_width = 2048;
-		dev->mode_config.max_height = 2048;
-		break;
-	default:
-		dev->mode_config.max_width = 4096;
-		dev->mode_config.max_height = 4096;
-		break;
-	}
-
-	dev->mode_config.fb_base = dev_priv->fb_phys;
-
 	nv04_crtc_create(dev, 0);
 	if (nv_two_heads(dev))
 		nv04_crtc_create(dev, 1);
@@ -235,8 +214,6 @@
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		crtc->funcs->restore(crtc);
 
-	drm_mode_config_cleanup(dev);
-
 	nouveau_hw_save_vga_fonts(dev, 0);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_pm.c b/drivers/gpu/drm/nouveau/nv04_pm.c
index eb1c70d..9ae92a8 100644
--- a/drivers/gpu/drm/nouveau/nv04_pm.c
+++ b/drivers/gpu/drm/nouveau/nv04_pm.c
@@ -68,6 +68,7 @@
 nv04_pm_clock_set(struct drm_device *dev, void *pre_state)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
 	struct nv04_pm_state *state = pre_state;
 	u32 reg = state->pll.reg;
 
@@ -85,6 +86,9 @@
 		nv_mask(dev, 0x1002c0, 0, 1 << 8);
 	}
 
+	if (reg == NV_PRAMDAC_NVPLL_COEFF)
+		ptimer->init(dev);
+
 	kfree(state);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_timer.c b/drivers/gpu/drm/nouveau/nv04_timer.c
index 1d09ddd..263301b 100644
--- a/drivers/gpu/drm/nouveau/nv04_timer.c
+++ b/drivers/gpu/drm/nouveau/nv04_timer.c
@@ -6,43 +6,75 @@
 int
 nv04_timer_init(struct drm_device *dev)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 m, n, d;
+
 	nv_wr32(dev, NV04_PTIMER_INTR_EN_0, 0x00000000);
 	nv_wr32(dev, NV04_PTIMER_INTR_0, 0xFFFFFFFF);
 
-	/* Just use the pre-existing values when possible for now; these regs
-	 * are not written in nv (driver writer missed a /4 on the address), and
-	 * writing 8 and 3 to the correct regs breaks the timings on the LVDS
-	 * hardware sequencing microcode.
-	 * A correct solution (involving calculations with the GPU PLL) can
-	 * be done when kernel modesetting lands
-	 */
-	if (!nv_rd32(dev, NV04_PTIMER_NUMERATOR) ||
-				!nv_rd32(dev, NV04_PTIMER_DENOMINATOR)) {
-		nv_wr32(dev, NV04_PTIMER_NUMERATOR, 0x00000008);
-		nv_wr32(dev, NV04_PTIMER_DENOMINATOR, 0x00000003);
+	/* aim for 31.25MHz, which gives us nanosecond timestamps */
+	d = 1000000 / 32;
+
+	/* determine base clock for timer source */
+	if (dev_priv->chipset < 0x40) {
+		n = dev_priv->engine.pm.clock_get(dev, PLL_CORE);
+	} else
+	if (dev_priv->chipset == 0x40) {
+		/*XXX: figure this out */
+		n = 0;
+	} else {
+		n = dev_priv->crystal;
+		m = 1;
+		while (n < (d * 2)) {
+			n += (n / m);
+			m++;
+		}
+
+		nv_wr32(dev, 0x009220, m - 1);
 	}
 
+	if (!n) {
+		NV_WARN(dev, "PTIMER: unknown input clock freq\n");
+		if (!nv_rd32(dev, NV04_PTIMER_NUMERATOR) ||
+		    !nv_rd32(dev, NV04_PTIMER_DENOMINATOR)) {
+			nv_wr32(dev, NV04_PTIMER_NUMERATOR, 1);
+			nv_wr32(dev, NV04_PTIMER_DENOMINATOR, 1);
+		}
+		return 0;
+	}
+
+	/* reduce ratio to acceptable values */
+	while (((n % 5) == 0) && ((d % 5) == 0)) {
+		n /= 5;
+		d /= 5;
+	}
+
+	while (((n % 2) == 0) && ((d % 2) == 0)) {
+		n /= 2;
+		d /= 2;
+	}
+
+	while (n > 0xffff || d > 0xffff) {
+		n >>= 1;
+		d >>= 1;
+	}
+
+	nv_wr32(dev, NV04_PTIMER_NUMERATOR, n);
+	nv_wr32(dev, NV04_PTIMER_DENOMINATOR, d);
 	return 0;
 }
 
-uint64_t
+u64
 nv04_timer_read(struct drm_device *dev)
 {
-	uint32_t low;
-	/* From kmmio dumps on nv28 this looks like how the blob does this.
-	 * It reads the high dword twice, before and after.
-	 * The only explanation seems to be that the 64-bit timer counter
-	 * advances between high and low dword reads and may corrupt the
-	 * result. Not confirmed.
-	 */
-	uint32_t high2 = nv_rd32(dev, NV04_PTIMER_TIME_1);
-	uint32_t high1;
+	u32 hi, lo;
+
 	do {
-		high1 = high2;
-		low = nv_rd32(dev, NV04_PTIMER_TIME_0);
-		high2 = nv_rd32(dev, NV04_PTIMER_TIME_1);
-	} while (high1 != high2);
-	return (((uint64_t)high2) << 32) | (uint64_t)low;
+		hi = nv_rd32(dev, NV04_PTIMER_TIME_1);
+		lo = nv_rd32(dev, NV04_PTIMER_TIME_0);
+	} while (hi != nv_rd32(dev, NV04_PTIMER_TIME_1));
+
+	return ((u64)hi << 32 | lo);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nv40_mpeg.c b/drivers/gpu/drm/nouveau/nv31_mpeg.c
similarity index 76%
rename from drivers/gpu/drm/nouveau/nv40_mpeg.c
rename to drivers/gpu/drm/nouveau/nv31_mpeg.c
index ad03a0e..6f06a07 100644
--- a/drivers/gpu/drm/nouveau/nv40_mpeg.c
+++ b/drivers/gpu/drm/nouveau/nv31_mpeg.c
@@ -26,10 +26,32 @@
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
 
-struct nv40_mpeg_engine {
+struct nv31_mpeg_engine {
 	struct nouveau_exec_engine base;
+	atomic_t refcount;
 };
 
+
+static int
+nv31_mpeg_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv31_mpeg_engine *pmpeg = nv_engine(chan->dev, engine);
+
+	if (!atomic_add_unless(&pmpeg->refcount, 1, 1))
+		return -EBUSY;
+
+	chan->engctx[engine] = (void *)0xdeadcafe;
+	return 0;
+}
+
+static void
+nv31_mpeg_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv31_mpeg_engine *pmpeg = nv_engine(chan->dev, engine);
+	atomic_dec(&pmpeg->refcount);
+	chan->engctx[engine] = NULL;
+}
+
 static int
 nv40_mpeg_context_new(struct nouveau_channel *chan, int engine)
 {
@@ -81,7 +103,7 @@
 }
 
 static int
-nv40_mpeg_object_new(struct nouveau_channel *chan, int engine,
+nv31_mpeg_object_new(struct nouveau_channel *chan, int engine,
 		      u32 handle, u16 class)
 {
 	struct drm_device *dev = chan->dev;
@@ -103,10 +125,10 @@
 }
 
 static int
-nv40_mpeg_init(struct drm_device *dev, int engine)
+nv31_mpeg_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+	struct nv31_mpeg_engine *pmpeg = nv_engine(dev, engine);
 	int i;
 
 	/* VPE init */
@@ -121,7 +143,7 @@
 	/* PMPEG init */
 	nv_wr32(dev, 0x00b32c, 0x00000000);
 	nv_wr32(dev, 0x00b314, 0x00000100);
-	nv_wr32(dev, 0x00b220, 0x00000044);
+	nv_wr32(dev, 0x00b220, nv44_graph_class(dev) ? 0x00000044 : 0x00000031);
 	nv_wr32(dev, 0x00b300, 0x02001ec1);
 	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
 
@@ -137,7 +159,7 @@
 }
 
 static int
-nv40_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
+nv31_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
 {
 	/*XXX: context save? */
 	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
@@ -146,7 +168,7 @@
 }
 
 static int
-nv40_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+nv31_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
 {
 	struct drm_device *dev = chan->dev;
 	u32 inst = data << 4;
@@ -184,13 +206,17 @@
 }
 
 static int
-nv40_mpeg_isr_chid(struct drm_device *dev, u32 inst)
+nv31_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ctx;
 	unsigned long flags;
 	int i;
 
+	/* hardcode drm channel id on nv3x, so swmthd lookup works */
+	if (dev_priv->card_type < NV_40)
+		return 0;
+
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
 		if (!dev_priv->channels.ptr[i])
@@ -205,7 +231,7 @@
 }
 
 static void
-nv40_vpe_set_tile_region(struct drm_device *dev, int i)
+nv31_vpe_set_tile_region(struct drm_device *dev, int i)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
@@ -216,10 +242,10 @@
 }
 
 static void
-nv40_mpeg_isr(struct drm_device *dev)
+nv31_mpeg_isr(struct drm_device *dev)
 {
 	u32 inst = (nv_rd32(dev, 0x00b318) & 0x000fffff) << 4;
-	u32 chid = nv40_mpeg_isr_chid(dev, inst);
+	u32 chid = nv31_mpeg_isr_chid(dev, inst);
 	u32 stat = nv_rd32(dev, 0x00b100);
 	u32 type = nv_rd32(dev, 0x00b230);
 	u32 mthd = nv_rd32(dev, 0x00b234);
@@ -249,10 +275,10 @@
 }
 
 static void
-nv40_vpe_isr(struct drm_device *dev)
+nv31_vpe_isr(struct drm_device *dev)
 {
 	if (nv_rd32(dev, 0x00b100))
-		nv40_mpeg_isr(dev);
+		nv31_mpeg_isr(dev);
 
 	if (nv_rd32(dev, 0x00b800)) {
 		u32 stat = nv_rd32(dev, 0x00b800);
@@ -262,9 +288,9 @@
 }
 
 static void
-nv40_mpeg_destroy(struct drm_device *dev, int engine)
+nv31_mpeg_destroy(struct drm_device *dev, int engine)
 {
-	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+	struct nv31_mpeg_engine *pmpeg = nv_engine(dev, engine);
 
 	nouveau_irq_unregister(dev, 0);
 
@@ -273,34 +299,41 @@
 }
 
 int
-nv40_mpeg_create(struct drm_device *dev)
+nv31_mpeg_create(struct drm_device *dev)
 {
-	struct nv40_mpeg_engine *pmpeg;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv31_mpeg_engine *pmpeg;
 
 	pmpeg = kzalloc(sizeof(*pmpeg), GFP_KERNEL);
 	if (!pmpeg)
 		return -ENOMEM;
+	atomic_set(&pmpeg->refcount, 0);
 
-	pmpeg->base.destroy = nv40_mpeg_destroy;
-	pmpeg->base.init = nv40_mpeg_init;
-	pmpeg->base.fini = nv40_mpeg_fini;
-	pmpeg->base.context_new = nv40_mpeg_context_new;
-	pmpeg->base.context_del = nv40_mpeg_context_del;
-	pmpeg->base.object_new = nv40_mpeg_object_new;
+	pmpeg->base.destroy = nv31_mpeg_destroy;
+	pmpeg->base.init = nv31_mpeg_init;
+	pmpeg->base.fini = nv31_mpeg_fini;
+	if (dev_priv->card_type < NV_40) {
+		pmpeg->base.context_new = nv31_mpeg_context_new;
+		pmpeg->base.context_del = nv31_mpeg_context_del;
+	} else {
+		pmpeg->base.context_new = nv40_mpeg_context_new;
+		pmpeg->base.context_del = nv40_mpeg_context_del;
+	}
+	pmpeg->base.object_new = nv31_mpeg_object_new;
 
 	/* ISR vector, PMC_ENABLE bit,  and TILE regs are shared between
 	 * all VPE engines, for this driver's purposes the PMPEG engine
 	 * will be treated as the "master" and handle the global VPE
 	 * bits too
 	 */
-	pmpeg->base.set_tile_region = nv40_vpe_set_tile_region;
-	nouveau_irq_register(dev, 0, nv40_vpe_isr);
+	pmpeg->base.set_tile_region = nv31_vpe_set_tile_region;
+	nouveau_irq_register(dev, 0, nv31_vpe_isr);
 
 	NVOBJ_ENGINE_ADD(dev, MPEG, &pmpeg->base);
 	NVOBJ_CLASS(dev, 0x3174, MPEG);
-	NVOBJ_MTHD (dev, 0x3174, 0x0190, nv40_mpeg_mthd_dma);
-	NVOBJ_MTHD (dev, 0x3174, 0x01a0, nv40_mpeg_mthd_dma);
-	NVOBJ_MTHD (dev, 0x3174, 0x01b0, nv40_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x0190, nv31_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01a0, nv31_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01b0, nv31_mpeg_mthd_dma);
 
 #if 0
 	NVOBJ_ENGINE_ADD(dev, ME, &pme->base);
diff --git a/drivers/gpu/drm/nouveau/nv40_pm.c b/drivers/gpu/drm/nouveau/nv40_pm.c
new file mode 100644
index 0000000..bbc0b9c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv40_pm.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_bios.h"
+#include "nouveau_pm.h"
+#include "nouveau_hw.h"
+
+#define min2(a,b) ((a) < (b) ? (a) : (b))
+
+static u32
+read_pll_1(struct drm_device *dev, u32 reg)
+{
+	u32 ctrl = nv_rd32(dev, reg + 0x00);
+	int P = (ctrl & 0x00070000) >> 16;
+	int N = (ctrl & 0x0000ff00) >> 8;
+	int M = (ctrl & 0x000000ff) >> 0;
+	u32 ref = 27000, clk = 0;
+
+	if (ctrl & 0x80000000)
+		clk = ref * N / M;
+
+	return clk >> P;
+}
+
+static u32
+read_pll_2(struct drm_device *dev, u32 reg)
+{
+	u32 ctrl = nv_rd32(dev, reg + 0x00);
+	u32 coef = nv_rd32(dev, reg + 0x04);
+	int N2 = (coef & 0xff000000) >> 24;
+	int M2 = (coef & 0x00ff0000) >> 16;
+	int N1 = (coef & 0x0000ff00) >> 8;
+	int M1 = (coef & 0x000000ff) >> 0;
+	int P = (ctrl & 0x00070000) >> 16;
+	u32 ref = 27000, clk = 0;
+
+	if (ctrl & 0x80000000)
+		clk = ref * N1 / M1;
+
+	if (!(ctrl & 0x00000100)) {
+		if (ctrl & 0x40000000)
+			clk = clk * N2 / M2;
+	}
+
+	return clk >> P;
+}
+
+static u32
+read_clk(struct drm_device *dev, u32 src)
+{
+	switch (src) {
+	case 3:
+		return read_pll_2(dev, 0x004000);
+	case 2:
+		return read_pll_1(dev, 0x004008);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nv40_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	u32 ctrl = nv_rd32(dev, 0x00c040);
+
+	perflvl->core   = read_clk(dev, (ctrl & 0x00000003) >> 0);
+	perflvl->shader = read_clk(dev, (ctrl & 0x00000030) >> 4);
+	perflvl->memory = read_pll_2(dev, 0x4020);
+	return 0;
+}
+
+struct nv40_pm_state {
+	u32 ctrl;
+	u32 npll_ctrl;
+	u32 npll_coef;
+	u32 spll;
+	u32 mpll_ctrl;
+	u32 mpll_coef;
+};
+
+static int
+nv40_calc_pll(struct drm_device *dev, u32 reg, struct pll_lims *pll,
+	      u32 clk, int *N1, int *M1, int *N2, int *M2, int *log2P)
+{
+	struct nouveau_pll_vals coef;
+	int ret;
+
+	ret = get_pll_limits(dev, reg, pll);
+	if (ret)
+		return ret;
+
+	if (clk < pll->vco1.maxfreq)
+		pll->vco2.maxfreq = 0;
+
+	ret = nouveau_calc_pll_mnp(dev, pll, clk, &coef);
+	if (ret == 0)
+		return -ERANGE;
+
+	*N1 = coef.N1;
+	*M1 = coef.M1;
+	if (N2 && M2) {
+		if (pll->vco2.maxfreq) {
+			*N2 = coef.N2;
+			*M2 = coef.M2;
+		} else {
+			*N2 = 1;
+			*M2 = 1;
+		}
+	}
+	*log2P = coef.log2P;
+	return 0;
+}
+
+void *
+nv40_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	struct nv40_pm_state *info;
+	struct pll_lims pll;
+	int N1, N2, M1, M2, log2P;
+	int ret;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	/* core/geometric clock */
+	ret = nv40_calc_pll(dev, 0x004000, &pll, perflvl->core,
+			    &N1, &M1, &N2, &M2, &log2P);
+	if (ret < 0)
+		goto out;
+
+	if (N2 == M2) {
+		info->npll_ctrl = 0x80000100 | (log2P << 16);
+		info->npll_coef = (N1 << 8) | M1;
+	} else {
+		info->npll_ctrl = 0xc0000000 | (log2P << 16);
+		info->npll_coef = (N2 << 24) | (M2 << 16) | (N1 << 8) | M1;
+	}
+
+	/* use the second PLL for shader/rop clock, if it differs from core */
+	if (perflvl->shader && perflvl->shader != perflvl->core) {
+		ret = nv40_calc_pll(dev, 0x004008, &pll, perflvl->shader,
+				    &N1, &M1, NULL, NULL, &log2P);
+		if (ret < 0)
+			goto out;
+
+		info->spll = 0xc0000000 | (log2P << 16) | (N1 << 8) | M1;
+		info->ctrl = 0x00000223;
+	} else {
+		info->spll = 0x00000000;
+		info->ctrl = 0x00000333;
+	}
+
+	/* memory clock */
+	ret = nv40_calc_pll(dev, 0x004020, &pll, perflvl->memory,
+			    &N1, &M1, &N2, &M2, &log2P);
+	if (ret < 0)
+		goto out;
+
+	info->mpll_ctrl  = 0x80000000 | (log2P << 16);
+	info->mpll_ctrl |= min2(pll.log2p_bias + log2P, pll.max_log2p) << 20;
+	if (N2 == M2) {
+		info->mpll_ctrl |= 0x00000100;
+		info->mpll_coef  = (N1 << 8) | M1;
+	} else {
+		info->mpll_ctrl |= 0x40000000;
+		info->mpll_coef  = (N2 << 24) | (M2 << 16) | (N1 << 8) | M1;
+	}
+
+out:
+	if (ret < 0) {
+		kfree(info);
+		info = ERR_PTR(ret);
+	}
+	return info;
+}
+
+static bool
+nv40_pm_gr_idle(void *data)
+{
+	struct drm_device *dev = data;
+
+	if ((nv_rd32(dev, 0x400760) & 0x000000f0) >> 4 !=
+	    (nv_rd32(dev, 0x400760) & 0x0000000f))
+		return false;
+
+	if (nv_rd32(dev, 0x400700))
+		return false;
+
+	return true;
+}
+
+void
+nv40_pm_clocks_set(struct drm_device *dev, void *pre_state)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv40_pm_state *info = pre_state;
+	unsigned long flags;
+	struct bit_entry M;
+	u32 crtc_mask = 0;
+	u8 sr1[2];
+	int i;
+
+	/* determine which CRTCs are active, fetch VGA_SR1 for each */
+	for (i = 0; i < 2; i++) {
+		u32 vbl = nv_rd32(dev, 0x600808 + (i * 0x2000));
+		u32 cnt = 0;
+		do {
+			if (vbl != nv_rd32(dev, 0x600808 + (i * 0x2000))) {
+				nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+				sr1[i] = nv_rd08(dev, 0x0c03c5 + (i * 0x2000));
+				if (!(sr1[i] & 0x20))
+					crtc_mask |= (1 << i);
+				break;
+			}
+			udelay(1);
+		} while (cnt++ < 32);
+	}
+
+	/* halt and idle engines */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000000);
+	if (!nv_wait(dev, 0x002500, 0x00000010, 0x00000000))
+		goto resume;
+	nv_mask(dev, 0x003220, 0x00000001, 0x00000000);
+	if (!nv_wait(dev, 0x003220, 0x00000010, 0x00000000))
+		goto resume;
+	nv_mask(dev, 0x003200, 0x00000001, 0x00000000);
+	nv04_fifo_cache_pull(dev, false);
+
+	if (!nv_wait_cb(dev, nv40_pm_gr_idle, dev))
+		goto resume;
+
+	/* set engine clocks */
+	nv_mask(dev, 0x00c040, 0x00000333, 0x00000000);
+	nv_wr32(dev, 0x004004, info->npll_coef);
+	nv_mask(dev, 0x004000, 0xc0070100, info->npll_ctrl);
+	nv_mask(dev, 0x004008, 0xc007ffff, info->spll);
+	mdelay(5);
+	nv_mask(dev, 0x00c040, 0x00000333, info->ctrl);
+
+	/* wait for vblank start on active crtcs, disable memory access */
+	for (i = 0; i < 2; i++) {
+		if (!(crtc_mask & (1 << i)))
+			continue;
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00000000);
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00010000);
+		nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+		nv_wr08(dev, 0x0c03c5 + (i * 0x2000), sr1[i] | 0x20);
+	}
+
+	/* prepare ram for reclocking */
+	nv_wr32(dev, 0x1002d4, 0x00000001); /* precharge */
+	nv_wr32(dev, 0x1002d0, 0x00000001); /* refresh */
+	nv_wr32(dev, 0x1002d0, 0x00000001); /* refresh */
+	nv_mask(dev, 0x100210, 0x80000000, 0x00000000); /* no auto refresh */
+	nv_wr32(dev, 0x1002dc, 0x00000001); /* enable self-refresh */
+
+	/* change the PLL of each memory partition */
+	nv_mask(dev, 0x00c040, 0x0000c000, 0x00000000);
+	switch (dev_priv->chipset) {
+	case 0x40:
+	case 0x45:
+	case 0x41:
+	case 0x42:
+	case 0x47:
+		nv_mask(dev, 0x004044, 0xc0771100, info->mpll_ctrl);
+		nv_mask(dev, 0x00402c, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x004048, info->mpll_coef);
+		nv_wr32(dev, 0x004030, info->mpll_coef);
+	case 0x43:
+	case 0x49:
+	case 0x4b:
+		nv_mask(dev, 0x004038, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x00403c, info->mpll_coef);
+	default:
+		nv_mask(dev, 0x004020, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x004024, info->mpll_coef);
+		break;
+	}
+	udelay(100);
+	nv_mask(dev, 0x00c040, 0x0000c000, 0x0000c000);
+
+	/* re-enable normal operation of memory controller */
+	nv_wr32(dev, 0x1002dc, 0x00000000);
+	nv_mask(dev, 0x100210, 0x80000000, 0x80000000);
+	udelay(100);
+
+	/* execute memory reset script from vbios */
+	if (!bit_table(dev, 'M', &M))
+		nouveau_bios_init_exec(dev, ROM16(M.data[0]));
+
+	/* make sure we're in vblank (hopefully the same one as before), and
+	 * then re-enable crtc memory access
+	 */
+	for (i = 0; i < 2; i++) {
+		if (!(crtc_mask & (1 << i)))
+			continue;
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00010000);
+		nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+		nv_wr08(dev, 0x0c03c5 + (i * 0x2000), sr1[i]);
+	}
+
+	/* resume engines */
+resume:
+	nv_wr32(dev, 0x003250, 0x00000001);
+	nv_mask(dev, 0x003220, 0x00000001, 0x00000001);
+	nv_wr32(dev, 0x003200, 0x00000001);
+	nv_wr32(dev, 0x002500, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	kfree(info);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 5d98907..882080e 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -329,8 +329,6 @@
 
 	drm_crtc_cleanup(&nv_crtc->base);
 
-	nv50_cursor_fini(nv_crtc);
-
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
index 9752c35..adfc9b6 100644
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
@@ -137,21 +137,3 @@
 	nv_crtc->cursor.show = nv50_cursor_show;
 	return 0;
 }
-
-void
-nv50_cursor_fini(struct nouveau_crtc *nv_crtc)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	int idx = nv_crtc->index;
-
-	NV_DEBUG_KMS(dev, "\n");
-
-	nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx), 0);
-	if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
-		     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
-		NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
-		NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
-			 nv_rd32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx)));
-	}
-}
-
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index db1a5f4..d23ca00 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -247,6 +247,16 @@
 		}
 	}
 
+	for (i = 0; i < 2; i++) {
+		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0);
+		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+			NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+			NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
+				 nv_rd32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i)));
+		}
+	}
+
 	nv50_evo_fini(dev);
 
 	for (i = 0; i < 3; i++) {
@@ -286,23 +296,6 @@
 		return -ENOMEM;
 	dev_priv->engine.display.priv = priv;
 
-	/* init basic kernel modesetting */
-	drm_mode_config_init(dev);
-
-	/* Initialise some optional connector properties. */
-	drm_mode_create_scaling_mode_property(dev);
-	drm_mode_create_dithering_property(dev);
-
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-
-	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
-
-	dev->mode_config.max_width = 8192;
-	dev->mode_config.max_height = 8192;
-
-	dev->mode_config.fb_base = dev_priv->fb_phys;
-
 	/* Create CRTC objects */
 	for (i = 0; i < 2; i++)
 		nv50_crtc_create(dev, i);
@@ -364,8 +357,6 @@
 
 	NV_DEBUG_KMS(dev, "\n");
 
-	drm_mode_config_cleanup(dev);
-
 	nv50_display_disable(dev);
 	nouveau_irq_unregister(dev, 26);
 	kfree(disp);
@@ -698,7 +689,7 @@
 		struct dcb_entry *dcb = &dev_priv->vbios.dcb.entry[i];
 
 		if (dcb->type == type && (dcb->or & (1 << or))) {
-			nouveau_bios_run_display_table(dev, dcb, 0, -1);
+			nouveau_bios_run_display_table(dev, 0, -1, dcb, -1);
 			disp->irq.dcb = dcb;
 			goto ack;
 		}
@@ -711,37 +702,6 @@
 }
 
 static void
-nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
-{
-	int or = ffs(dcb->or) - 1, link = !(dcb->dpconf.sor.link & 1);
-	struct drm_encoder *encoder;
-	uint32_t tmp, unk0 = 0, unk1 = 0;
-
-	if (dcb->type != OUTPUT_DP)
-		return;
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-
-		if (nv_encoder->dcb == dcb) {
-			unk0 = nv_encoder->dp.unk0;
-			unk1 = nv_encoder->dp.unk1;
-			break;
-		}
-	}
-
-	if (unk0 || unk1) {
-		tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-		tmp &= 0xfffffe03;
-		nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp | unk0);
-
-		tmp  = nv_rd32(dev, NV50_SOR_DP_UNK128(or, link));
-		tmp &= 0xfef080c0;
-		nv_wr32(dev, NV50_SOR_DP_UNK128(or, link), tmp | unk1);
-	}
-}
-
-static void
 nv50_display_unk20_handler(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -753,7 +713,7 @@
 	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
 	dcb = disp->irq.dcb;
 	if (dcb) {
-		nouveau_bios_run_display_table(dev, dcb, 0, -2);
+		nouveau_bios_run_display_table(dev, 0, -2, dcb, -1);
 		disp->irq.dcb = NULL;
 	}
 
@@ -837,9 +797,15 @@
 	}
 
 	script = nv50_display_script_select(dev, dcb, mc, pclk);
-	nouveau_bios_run_display_table(dev, dcb, script, pclk);
+	nouveau_bios_run_display_table(dev, script, pclk, dcb, -1);
 
-	nv50_display_unk20_dp_hack(dev, dcb);
+	if (type == OUTPUT_DP) {
+		int link = !(dcb->dpconf.sor.link & 1);
+		if ((mc & 0x000f0000) == 0x00020000)
+			nouveau_dp_tu_update(dev, or, link, pclk, 18);
+		else
+			nouveau_dp_tu_update(dev, or, link, pclk, 24);
+	}
 
 	if (dcb->type != OUTPUT_ANALOG) {
 		tmp = nv_rd32(dev, NV50_PDISPLAY_SOR_CLK_CTRL2(or));
@@ -904,7 +870,7 @@
 	if (!dcb)
 		goto ack;
 
-	nouveau_bios_run_display_table(dev, dcb, script, -pclk);
+	nouveau_bios_run_display_table(dev, script, -pclk, dcb, -1);
 	nv50_display_unk40_dp_set_tmds(dev, dcb);
 
 ack:
diff --git a/drivers/gpu/drm/nouveau/nv50_gpio.c b/drivers/gpu/drm/nouveau/nv50_gpio.c
index d4f4206..793a5cc 100644
--- a/drivers/gpu/drm/nouveau/nv50_gpio.c
+++ b/drivers/gpu/drm/nouveau/nv50_gpio.c
@@ -98,6 +98,37 @@
 }
 
 int
+nvd0_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag)
+{
+	struct dcb_gpio_entry *gpio;
+	u32 v;
+
+	gpio = nouveau_bios_gpio_entry(dev, tag);
+	if (!gpio)
+		return -ENOENT;
+
+	v  = nv_rd32(dev, 0x00d610 + (gpio->line * 4));
+	v &= 0x00004000;
+	return (!!v == (gpio->state[1] & 1));
+}
+
+int
+nvd0_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state)
+{
+	struct dcb_gpio_entry *gpio;
+	u32 v;
+
+	gpio = nouveau_bios_gpio_entry(dev, tag);
+	if (!gpio)
+		return -ENOENT;
+
+	v = gpio->state[state] ^ 2;
+
+	nv_mask(dev, 0x00d610 + (gpio->line * 4), 0x00003000, v << 12);
+	return 0;
+}
+
+int
 nv50_gpio_irq_register(struct drm_device *dev, enum dcb_gpio_tag tag,
 		       void (*handler)(void *, int), void *data)
 {
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index d43c46c..8c979b3 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -120,70 +120,62 @@
 	return 0;
 }
 
-static void
-nv50_graph_init_reset(struct drm_device *dev)
-{
-	uint32_t pmc_e = NV_PMC_ENABLE_PGRAPH | (1 << 21);
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e);
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |  pmc_e);
-}
-
-static void
-nv50_graph_init_intr(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV03_PGRAPH_INTR, 0xffffffff);
-	nv_wr32(dev, 0x400138, 0xffffffff);
-	nv_wr32(dev, NV40_PGRAPH_INTR_EN, 0xffffffff);
-}
-
-static void
-nv50_graph_init_regs__nv(struct drm_device *dev)
+static int
+nv50_graph_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t units = nv_rd32(dev, 0x1540);
+	struct nv50_graph_engine *pgraph = nv_engine(dev, engine);
+	u32 units = nv_rd32(dev, 0x001540);
 	int i;
 
 	NV_DEBUG(dev, "\n");
 
+	/* master reset */
+	nv_mask(dev, 0x000200, 0x00200100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00200100, 0x00200100);
+	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
+
+	/* reset/enable traps and interrupts */
 	nv_wr32(dev, 0x400804, 0xc0000000);
 	nv_wr32(dev, 0x406800, 0xc0000000);
 	nv_wr32(dev, 0x400c04, 0xc0000000);
 	nv_wr32(dev, 0x401800, 0xc0000000);
 	nv_wr32(dev, 0x405018, 0xc0000000);
 	nv_wr32(dev, 0x402000, 0xc0000000);
-
 	for (i = 0; i < 16; i++) {
-		if (units & 1 << i) {
-			if (dev_priv->chipset < 0xa0) {
-				nv_wr32(dev, 0x408900 + (i << 12), 0xc0000000);
-				nv_wr32(dev, 0x408e08 + (i << 12), 0xc0000000);
-				nv_wr32(dev, 0x408314 + (i << 12), 0xc0000000);
-			} else {
-				nv_wr32(dev, 0x408600 + (i << 11), 0xc0000000);
-				nv_wr32(dev, 0x408708 + (i << 11), 0xc0000000);
-				nv_wr32(dev, 0x40831c + (i << 11), 0xc0000000);
-			}
+		if (!(units & (1 << i)))
+			continue;
+
+		if (dev_priv->chipset < 0xa0) {
+			nv_wr32(dev, 0x408900 + (i << 12), 0xc0000000);
+			nv_wr32(dev, 0x408e08 + (i << 12), 0xc0000000);
+			nv_wr32(dev, 0x408314 + (i << 12), 0xc0000000);
+		} else {
+			nv_wr32(dev, 0x408600 + (i << 11), 0xc0000000);
+			nv_wr32(dev, 0x408708 + (i << 11), 0xc0000000);
+			nv_wr32(dev, 0x40831c + (i << 11), 0xc0000000);
 		}
 	}
 
 	nv_wr32(dev, 0x400108, 0xffffffff);
-
-	nv_wr32(dev, 0x400824, 0x00004000);
+	nv_wr32(dev, 0x400138, 0xffffffff);
+	nv_wr32(dev, 0x400100, 0xffffffff);
+	nv_wr32(dev, 0x40013c, 0xffffffff);
 	nv_wr32(dev, 0x400500, 0x00010001);
-}
 
-static void
-nv50_graph_init_zcull(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i;
+	/* upload context program, initialise ctxctl defaults */
+	nv_wr32(dev, 0x400324, 0x00000000);
+	for (i = 0; i < pgraph->ctxprog_size; i++)
+		nv_wr32(dev, 0x400328, pgraph->ctxprog[i]);
+	nv_wr32(dev, 0x400824, 0x00000000);
+	nv_wr32(dev, 0x400828, 0x00000000);
+	nv_wr32(dev, 0x40082c, 0x00000000);
+	nv_wr32(dev, 0x400830, 0x00000000);
+	nv_wr32(dev, 0x400724, 0x00000000);
+	nv_wr32(dev, 0x40032c, 0x00000000);
+	nv_wr32(dev, 0x400320, 4);	/* CTXCTL_CMD = NEWCTXDMA */
 
-	NV_DEBUG(dev, "\n");
-
+	/* some unknown zcull magic */
 	switch (dev_priv->chipset & 0xf0) {
 	case 0x50:
 	case 0x80:
@@ -212,43 +204,7 @@
 		nv_wr32(dev, 0x402c28 + (i * 8), 0x00000000);
 		nv_wr32(dev, 0x402c2c + (i * 8), 0x00000000);
 	}
-}
 
-static int
-nv50_graph_init_ctxctl(struct drm_device *dev)
-{
-	struct nv50_graph_engine *pgraph = nv_engine(dev, NVOBJ_ENGINE_GR);
-	int i;
-
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
-	for (i = 0; i < pgraph->ctxprog_size; i++)
-		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, pgraph->ctxprog[i]);
-
-	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
-	nv_wr32(dev, 0x400320, 4);
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, 0);
-	return 0;
-}
-
-static int
-nv50_graph_init(struct drm_device *dev, int engine)
-{
-	int ret;
-
-	NV_DEBUG(dev, "\n");
-
-	nv50_graph_init_reset(dev);
-	nv50_graph_init_regs__nv(dev);
-	nv50_graph_init_zcull(dev);
-
-	ret = nv50_graph_init_ctxctl(dev);
-	if (ret)
-		return ret;
-
-	nv50_graph_init_intr(dev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
index de9abff..d05c2c3 100644
--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -40,6 +40,12 @@
 #define CP_FLAG_UNK0B                 ((0 * 32) + 0xb)
 #define CP_FLAG_UNK0B_CLEAR           0
 #define CP_FLAG_UNK0B_SET             1
+#define CP_FLAG_XFER_SWITCH           ((0 * 32) + 0xe)
+#define CP_FLAG_XFER_SWITCH_DISABLE   0
+#define CP_FLAG_XFER_SWITCH_ENABLE    1
+#define CP_FLAG_STATE                 ((0 * 32) + 0x1c)
+#define CP_FLAG_STATE_STOPPED         0
+#define CP_FLAG_STATE_RUNNING         1
 #define CP_FLAG_UNK1D                 ((0 * 32) + 0x1d)
 #define CP_FLAG_UNK1D_CLEAR           0
 #define CP_FLAG_UNK1D_SET             1
@@ -194,6 +200,9 @@
 				   "the devs.\n");
 		return -ENOSYS;
 	}
+
+	cp_set (ctx, STATE, RUNNING);
+	cp_set (ctx, XFER_SWITCH, ENABLE);
 	/* decide whether we're loading/unloading the context */
 	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
 	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
@@ -260,6 +269,8 @@
 	cp_name(ctx, cp_exit);
 	cp_set (ctx, USER_SAVE, NOT_PENDING);
 	cp_set (ctx, USER_LOAD, NOT_PENDING);
+	cp_set (ctx, XFER_SWITCH, DISABLE);
+	cp_set (ctx, STATE, STOPPED);
 	cp_out (ctx, CP_END);
 	ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */
 
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index 8a28100..3d5a86b 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c
@@ -115,15 +115,15 @@
 	    BIT_M.version == 1 && BIT_M.length >= 0x0b) {
 		script = ROM16(BIT_M.data[0x05]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 		script = ROM16(BIT_M.data[0x07]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 		script = ROM16(BIT_M.data[0x09]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 
-		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL);
+		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL, -1);
 	}
 
 	if (state->type == PLL_MEMORY) {
diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index ffe8b48..2633aa8 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -124,7 +124,7 @@
 		if (mode == DRM_MODE_DPMS_ON) {
 			u8 status = DP_SET_POWER_D0;
 			nouveau_dp_auxch(auxch, 8, DP_SET_POWER, &status, 1);
-			nouveau_dp_link_train(encoder);
+			nouveau_dp_link_train(encoder, nv_encoder->dp.datarate);
 		} else {
 			u8 status = DP_SET_POWER_D3;
 			nouveau_dp_auxch(auxch, 8, DP_SET_POWER, &status, 1);
@@ -187,14 +187,13 @@
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
 	uint32_t mode_ctl = 0;
 	int ret;
 
 	NV_DEBUG_KMS(dev, "or %d type %d -> crtc %d\n",
 		     nv_encoder->or, nv_encoder->dcb->type, crtc->index);
 
-	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
-
 	switch (nv_encoder->dcb->type) {
 	case OUTPUT_TMDS:
 		if (nv_encoder->dcb->sorconf.link & 1) {
@@ -206,7 +205,15 @@
 			mode_ctl = 0x0200;
 		break;
 	case OUTPUT_DP:
-		mode_ctl |= (nv_encoder->dp.mc_unknown << 16);
+		nv_connector = nouveau_encoder_connector_get(nv_encoder);
+		if (nv_connector && nv_connector->base.display_info.bpc == 6) {
+			nv_encoder->dp.datarate = crtc->mode->clock * 18 / 8;
+			mode_ctl |= 0x00020000;
+		} else {
+			nv_encoder->dp.datarate = crtc->mode->clock * 24 / 8;
+			mode_ctl |= 0x00050000;
+		}
+
 		if (nv_encoder->dcb->sorconf.link & 1)
 			mode_ctl |= 0x00000800;
 		else
@@ -227,6 +234,8 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
 		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_NVSYNC;
 
+	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+
 	ret = RING_SPACE(evo, 2);
 	if (ret) {
 		NV_ERROR(dev, "no space while connecting SOR\n");
@@ -313,31 +322,6 @@
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 
-	if (nv_encoder->dcb->type == OUTPUT_DP) {
-		int or = nv_encoder->or, link = !(entry->dpconf.sor.link & 1);
-		uint32_t tmp;
-
-		tmp = nv_rd32(dev, 0x61c700 + (or * 0x800));
-		if (!tmp)
-			tmp = nv_rd32(dev, 0x610798 + (or * 8));
-
-		switch ((tmp & 0x00000f00) >> 8) {
-		case 8:
-		case 9:
-			nv_encoder->dp.mc_unknown = (tmp & 0x000f0000) >> 16;
-			tmp = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-			nv_encoder->dp.unk0 = tmp & 0x000001fc;
-			tmp = nv_rd32(dev, NV50_SOR_DP_UNK128(or, link));
-			nv_encoder->dp.unk1 = tmp & 0x010f7f3f;
-			break;
-		default:
-			break;
-		}
-
-		if (!nv_encoder->dp.mc_unknown)
-			nv_encoder->dp.mc_unknown = 5;
-	}
-
 	drm_mode_connector_attach_encoder(connector, encoder);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_vram.c b/drivers/gpu/drm/nouveau/nv50_vram.c
index af32dae..9da2383 100644
--- a/drivers/gpu/drm/nouveau/nv50_vram.c
+++ b/drivers/gpu/drm/nouveau/nv50_vram.c
@@ -51,7 +51,7 @@
 nv50_vram_del(struct drm_device *dev, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *this;
 	struct nouveau_mem *mem;
 
@@ -82,7 +82,7 @@
 	      u32 memtype, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index e4b2b9e..618c144 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -27,178 +27,316 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 
-/* This is actually a lot more complex than it appears here, but hopefully
- * this should be able to deal with what the VBIOS leaves for us..
- *
- * If not, well, I'll jump off that bridge when I come to it.
- */
+static u32 read_clk(struct drm_device *, int, bool);
+static u32 read_pll(struct drm_device *, int, u32);
 
-struct nva3_pm_state {
-	enum pll_types type;
-	u32 src0;
-	u32 src1;
-	u32 ctrl;
-	u32 coef;
-	u32 old_pnm;
-	u32 new_pnm;
-	u32 new_div;
+static u32
+read_vco(struct drm_device *dev, int clk)
+{
+	u32 sctl = nv_rd32(dev, 0x4120 + (clk * 4));
+	if ((sctl & 0x00000030) != 0x00000030)
+		return read_pll(dev, 0x41, 0x00e820);
+	return read_pll(dev, 0x42, 0x00e8a0);
+}
+
+static u32
+read_clk(struct drm_device *dev, int clk, bool ignore_en)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 sctl, sdiv, sclk;
+
+	/* refclk for the 0xe8xx plls is a fixed frequency */
+	if (clk >= 0x40) {
+		if (dev_priv->chipset == 0xaf) {
+			/* no joke.. seriously.. sigh.. */
+			return nv_rd32(dev, 0x00471c) * 1000;
+		}
+
+		return dev_priv->crystal;
+	}
+
+	sctl = nv_rd32(dev, 0x4120 + (clk * 4));
+	if (!ignore_en && !(sctl & 0x00000100))
+		return 0;
+
+	switch (sctl & 0x00003000) {
+	case 0x00000000:
+		return dev_priv->crystal;
+	case 0x00002000:
+		if (sctl & 0x00000040)
+			return 108000;
+		return 100000;
+	case 0x00003000:
+		sclk = read_vco(dev, clk);
+		sdiv = ((sctl & 0x003f0000) >> 16) + 2;
+		return (sclk * 2) / sdiv;
+	default:
+		return 0;
+	}
+}
+
+static u32
+read_pll(struct drm_device *dev, int clk, u32 pll)
+{
+	u32 ctrl = nv_rd32(dev, pll + 0);
+	u32 sclk = 0, P = 1, N = 1, M = 1;
+
+	if (!(ctrl & 0x00000008)) {
+		if (ctrl & 0x00000001) {
+			u32 coef = nv_rd32(dev, pll + 4);
+			M = (coef & 0x000000ff) >> 0;
+			N = (coef & 0x0000ff00) >> 8;
+			P = (coef & 0x003f0000) >> 16;
+
+			/* no post-divider on these.. */
+			if ((pll & 0x00ff00) == 0x00e800)
+				P = 1;
+
+			sclk = read_clk(dev, 0x00 + clk, false);
+		}
+	} else {
+		sclk = read_clk(dev, 0x10 + clk, false);
+	}
+
+	return sclk * N / (M * P);
+}
+
+struct creg {
+	u32 clk;
+	u32 pll;
 };
 
 static int
-nva3_pm_pll_offset(u32 id)
+calc_clk(struct drm_device *dev, int clk, u32 pll, u32 khz, struct creg *reg)
 {
-	static const u32 pll_map[] = {
-		0x00, PLL_CORE,
-		0x01, PLL_SHADER,
-		0x02, PLL_MEMORY,
-		0x00, 0x00
-	};
-	const u32 *map = pll_map;
+	struct pll_lims limits;
+	u32 oclk, sclk, sdiv;
+	int P, N, M, diff;
+	int ret;
 
-	while (map[1]) {
-		if (id == map[1])
-			return map[0];
-		map += 2;
+	reg->pll = 0;
+	reg->clk = 0;
+	if (!khz) {
+		NV_DEBUG(dev, "no clock for 0x%04x/0x%02x\n", pll, clk);
+		return 0;
 	}
 
-	return -ENOENT;
-}
+	switch (khz) {
+	case 27000:
+		reg->clk = 0x00000100;
+		return khz;
+	case 100000:
+		reg->clk = 0x00002100;
+		return khz;
+	case 108000:
+		reg->clk = 0x00002140;
+		return khz;
+	default:
+		sclk = read_vco(dev, clk);
+		sdiv = min((sclk * 2) / (khz - 2999), (u32)65);
+		/* if the clock has a PLL attached, and we can get a within
+		 * [-2, 3) MHz of a divider, we'll disable the PLL and use
+		 * the divider instead.
+		 *
+		 * divider can go as low as 2, limited here because NVIDIA
+		 * and the VBIOS on my NVA8 seem to prefer using the PLL
+		 * for 810MHz - is there a good reason?
+		 */
+		if (sdiv > 4) {
+			oclk = (sclk * 2) / sdiv;
+			diff = khz - oclk;
+			if (!pll || (diff >= -2000 && diff < 3000)) {
+				reg->clk = (((sdiv - 2) << 16) | 0x00003100);
+				return oclk;
+			}
+		}
 
-int
-nva3_pm_clock_get(struct drm_device *dev, u32 id)
-{
-	u32 src0, src1, ctrl, coef;
-	struct pll_lims pll;
-	int ret, off;
-	int P, N, M;
+		if (!pll) {
+			NV_ERROR(dev, "bad freq %02x: %d %d\n", clk, khz, sclk);
+			return -ERANGE;
+		}
 
-	ret = get_pll_limits(dev, id, &pll);
+		break;
+	}
+
+	ret = get_pll_limits(dev, pll, &limits);
 	if (ret)
 		return ret;
 
-	off = nva3_pm_pll_offset(id);
-	if (off < 0)
-		return off;
+	limits.refclk = read_clk(dev, clk - 0x10, true);
+	if (!limits.refclk)
+		return -EINVAL;
 
-	src0 = nv_rd32(dev, 0x4120 + (off * 4));
-	src1 = nv_rd32(dev, 0x4160 + (off * 4));
-	ctrl = nv_rd32(dev, pll.reg + 0);
-	coef = nv_rd32(dev, pll.reg + 4);
-	NV_DEBUG(dev, "PLL %02x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-		      id, src0, src1, ctrl, coef);
-
-	if (ctrl & 0x00000008) {
-		u32 div = ((src1 & 0x003c0000) >> 18) + 1;
-		return (pll.refclk * 2) / div;
+	ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
+	if (ret >= 0) {
+		reg->clk = nv_rd32(dev, 0x4120 + (clk * 4));
+		reg->pll = (P << 16) | (N << 8) | M;
 	}
-
-	P = (coef & 0x003f0000) >> 16;
-	N = (coef & 0x0000ff00) >> 8;
-	M = (coef & 0x000000ff);
-	return pll.refclk * N / M / P;
+	return ret;
 }
 
-void *
-nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
-		  u32 id, int khz)
+static void
+prog_pll(struct drm_device *dev, int clk, u32 pll, struct creg *reg)
 {
-	struct nva3_pm_state *pll;
-	struct pll_lims limits;
-	int N, M, P, diff;
-	int ret, off;
+	const u32 src0 = 0x004120 + (clk * 4);
+	const u32 src1 = 0x004160 + (clk * 4);
+	const u32 ctrl = pll + 0;
+	const u32 coef = pll + 4;
+	u32 cntl;
 
-	ret = get_pll_limits(dev, id, &limits);
-	if (ret < 0)
-		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
-
-	off = nva3_pm_pll_offset(id);
-	if (id < 0)
-		return ERR_PTR(-EINVAL);
-
-
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll)
-		return ERR_PTR(-ENOMEM);
-	pll->type = id;
-	pll->src0 = 0x004120 + (off * 4);
-	pll->src1 = 0x004160 + (off * 4);
-	pll->ctrl = limits.reg + 0;
-	pll->coef = limits.reg + 4;
-
-	/* If target clock is within [-2, 3) MHz of a divisor, we'll
-	 * use that instead of calculating MNP values
-	 */
-	pll->new_div = min((limits.refclk * 2) / (khz - 2999), 16);
-	if (pll->new_div) {
-		diff = khz - ((limits.refclk * 2) / pll->new_div);
-		if (diff < -2000 || diff >= 3000)
-			pll->new_div = 0;
+	if (!reg->clk && !reg->pll) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
 	}
 
-	if (!pll->new_div) {
-		ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
-		if (ret < 0)
-			return ERR_PTR(ret);
-
-		pll->new_pnm = (P << 16) | (N << 8) | M;
-		pll->new_div = 2 - 1;
+	cntl = nv_rd32(dev, ctrl) & 0xfffffff2;
+	if (reg->pll) {
+		nv_mask(dev, src0, 0x00000101, 0x00000101);
+		nv_wr32(dev, coef, reg->pll);
+		nv_wr32(dev, ctrl, cntl | 0x00000015);
+		nv_mask(dev, src1, 0x00000100, 0x00000000);
+		nv_mask(dev, src1, 0x00000001, 0x00000000);
 	} else {
-		pll->new_pnm = 0;
-		pll->new_div--;
+		nv_mask(dev, src1, 0x003f3141, 0x00000101 | reg->clk);
+		nv_wr32(dev, ctrl, cntl | 0x0000001d);
+		nv_mask(dev, ctrl, 0x00000001, 0x00000000);
+		nv_mask(dev, src0, 0x00000100, 0x00000000);
+		nv_mask(dev, src0, 0x00000001, 0x00000000);
+	}
+}
+
+static void
+prog_clk(struct drm_device *dev, int clk, struct creg *reg)
+{
+	if (!reg->clk) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
 	}
 
-	if ((nv_rd32(dev, pll->src1) & 0x00000101) != 0x00000101)
-		pll->old_pnm = nv_rd32(dev, pll->coef);
-	return pll;
+	nv_mask(dev, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | reg->clk);
+}
+
+int
+nva3_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	perflvl->core   = read_pll(dev, 0x00, 0x4200);
+	perflvl->shader = read_pll(dev, 0x01, 0x4220);
+	perflvl->memory = read_pll(dev, 0x02, 0x4000);
+	perflvl->unka0  = read_clk(dev, 0x20, false);
+	perflvl->vdec   = read_clk(dev, 0x21, false);
+	perflvl->daemon = read_clk(dev, 0x25, false);
+	perflvl->copy   = perflvl->core;
+	return 0;
+}
+
+struct nva3_pm_state {
+	struct creg nclk;
+	struct creg sclk;
+	struct creg mclk;
+	struct creg vdec;
+	struct creg unka0;
+};
+
+void *
+nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	struct nva3_pm_state *info;
+	int ret;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	ret = calc_clk(dev, 0x10, 0x4200, perflvl->core, &info->nclk);
+	if (ret < 0)
+		goto out;
+
+	ret = calc_clk(dev, 0x11, 0x4220, perflvl->shader, &info->sclk);
+	if (ret < 0)
+		goto out;
+
+	ret = calc_clk(dev, 0x12, 0x4000, perflvl->memory, &info->mclk);
+	if (ret < 0)
+		goto out;
+
+	ret = calc_clk(dev, 0x20, 0x0000, perflvl->unka0, &info->unka0);
+	if (ret < 0)
+		goto out;
+
+	ret = calc_clk(dev, 0x21, 0x0000, perflvl->vdec, &info->vdec);
+	if (ret < 0)
+		goto out;
+
+out:
+	if (ret < 0) {
+		kfree(info);
+		info = ERR_PTR(ret);
+	}
+	return info;
+}
+
+static bool
+nva3_pm_grcp_idle(void *data)
+{
+	struct drm_device *dev = data;
+
+	if (!(nv_rd32(dev, 0x400304) & 0x00000001))
+		return true;
+	if (nv_rd32(dev, 0x400308) == 0x0050001c)
+		return true;
+	return false;
 }
 
 void
-nva3_pm_clock_set(struct drm_device *dev, void *pre_state)
+nva3_pm_clocks_set(struct drm_device *dev, void *pre_state)
 {
-	struct nva3_pm_state *pll = pre_state;
-	u32 ctrl = 0;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nva3_pm_state *info = pre_state;
+	unsigned long flags;
 
-	/* For the memory clock, NVIDIA will build a "script" describing
-	 * the reclocking process and ask PDAEMON to execute it.
-	 */
-	if (pll->type == PLL_MEMORY) {
+	/* prevent any new grctx switches from starting */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x400324, 0x00000000);
+	nv_wr32(dev, 0x400328, 0x0050001c); /* wait flag 0x1c */
+	/* wait for any pending grctx switches to complete */
+	if (!nv_wait_cb(dev, nva3_pm_grcp_idle, dev)) {
+		NV_ERROR(dev, "pm: ctxprog didn't go idle\n");
+		goto cleanup;
+	}
+	/* freeze PFIFO */
+	nv_mask(dev, 0x002504, 0x00000001, 0x00000001);
+	if (!nv_wait(dev, 0x002504, 0x00000010, 0x00000010)) {
+		NV_ERROR(dev, "pm: fifo didn't go idle\n");
+		goto cleanup;
+	}
+
+	prog_pll(dev, 0x00, 0x004200, &info->nclk);
+	prog_pll(dev, 0x01, 0x004220, &info->sclk);
+	prog_clk(dev, 0x20, &info->unka0);
+	prog_clk(dev, 0x21, &info->vdec);
+
+	if (info->mclk.clk || info->mclk.pll) {
 		nv_wr32(dev, 0x100210, 0);
 		nv_wr32(dev, 0x1002dc, 1);
 		nv_wr32(dev, 0x004018, 0x00001000);
-		ctrl = 0x18000100;
-	}
-
-	if (pll->old_pnm || !pll->new_pnm) {
-		nv_mask(dev, pll->src1, 0x003c0101, 0x00000101 |
-						    (pll->new_div << 18));
-		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
-		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
-	}
-
-	if (pll->new_pnm) {
-		nv_mask(dev, pll->src0, 0x00000101, 0x00000101);
-		nv_wr32(dev, pll->coef, pll->new_pnm);
-		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
-		nv_mask(dev, pll->ctrl, 0x00000010, 0x00000000);
-		nv_mask(dev, pll->ctrl, 0x00020010, 0x00020010);
-		nv_wr32(dev, pll->ctrl, 0x00010015 | ctrl);
-		nv_mask(dev, pll->src1, 0x00000100, 0x00000000);
-		nv_mask(dev, pll->src1, 0x00000001, 0x00000000);
-		if (pll->type == PLL_MEMORY)
-			nv_wr32(dev, 0x4018, 0x10005000);
-	} else {
-		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
-		nv_mask(dev, pll->src0, 0x00000100, 0x00000000);
-		nv_mask(dev, pll->src0, 0x00000001, 0x00000000);
-		if (pll->type == PLL_MEMORY)
-			nv_wr32(dev, 0x4018, 0x1000d000);
-	}
-
-	if (pll->type == PLL_MEMORY) {
+		prog_pll(dev, 0x02, 0x004000, &info->mclk);
+		if (nv_rd32(dev, 0x4000) & 0x00000008)
+			nv_wr32(dev, 0x004018, 0x1000d000);
+		else
+			nv_wr32(dev, 0x004018, 0x10005000);
 		nv_wr32(dev, 0x1002dc, 0);
 		nv_wr32(dev, 0x100210, 0x80000000);
 	}
 
-	kfree(pll);
+cleanup:
+	/* unfreeze PFIFO */
+	nv_mask(dev, 0x002504, 0x00000001, 0x00000000);
+	/* restore ctxprog to normal */
+	nv_wr32(dev, 0x400324, 0x00000000);
+	nv_wr32(dev, 0x400328, 0x0070009c); /* set flag 0x1c */
+	/* unblock it if necessary */
+	if (nv_rd32(dev, 0x400308) == 0x0050001c)
+		nv_mask(dev, 0x400824, 0x10000000, 0x10000000);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+	kfree(info);
 }
-
diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c
index 08e6b11..5bf5503 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fb.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fb.c
@@ -32,6 +32,30 @@
 	dma_addr_t r100c10;
 };
 
+static inline void
+nvc0_mfb_subp_isr(struct drm_device *dev, int unit, int subp)
+{
+	u32 subp_base = 0x141000 + (unit * 0x2000) + (subp * 0x400);
+	u32 stat = nv_rd32(dev, subp_base + 0x020);
+
+	if (stat) {
+		NV_INFO(dev, "PMFB%d_SUBP%d: 0x%08x\n", unit, subp, stat);
+		nv_wr32(dev, subp_base + 0x020, stat);
+	}
+}
+
+static void
+nvc0_mfb_isr(struct drm_device *dev)
+{
+	u32 units = nv_rd32(dev, 0x00017c);
+	while (units) {
+		u32 subp, unit = ffs(units) - 1;
+		for (subp = 0; subp < 2; subp++)
+			nvc0_mfb_subp_isr(dev, unit, subp);
+		units &= ~(1 << unit);
+	}
+}
+
 static void
 nvc0_fb_destroy(struct drm_device *dev)
 {
@@ -39,6 +63,8 @@
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	struct nvc0_fb_priv *priv = pfb->priv;
 
+	nouveau_irq_unregister(dev, 25);
+
 	if (priv->r100c10_page) {
 		pci_unmap_page(dev->pdev, priv->r100c10, PAGE_SIZE,
 			       PCI_DMA_BIDIRECTIONAL);
@@ -74,6 +100,7 @@
 		return -EFAULT;
 	}
 
+	nouveau_irq_register(dev, 25, nvc0_mfb_isr);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index 6f9f341..dcbe0d5 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -322,7 +322,7 @@
 	}
 
 	/* PSUBFIFO[n] */
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < priv->spoon_nr; i++) {
 		nv_mask(dev, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
 		nv_wr32(dev, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
 		nv_wr32(dev, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTR_EN */
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index 5b2f6f4..4b8d0b3 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -390,7 +390,7 @@
 	}
 
 	nv_wr32(dev, GPC_BCAST(0x1bd4), magicgpc918);
-	nv_wr32(dev, GPC_BCAST(0x08ac), priv->rop_nr);
+	nv_wr32(dev, GPC_BCAST(0x08ac), nv_rd32(dev, 0x100800));
 }
 
 static void
@@ -700,22 +700,6 @@
 	nv_wr32(dev, 0x400500, 0x00010001);
 }
 
-static void
-nvc0_runk140_isr(struct drm_device *dev)
-{
-	u32 units = nv_rd32(dev, 0x00017c) & 0x1f;
-
-	while (units) {
-		u32 unit = ffs(units) - 1;
-		u32 reg = 0x140000 + unit * 0x2000;
-		u32 st0 = nv_mask(dev, reg + 0x1020, 0, 0);
-		u32 st1 = nv_mask(dev, reg + 0x1420, 0, 0);
-
-		NV_DEBUG(dev, "PRUNK140: %d 0x%08x 0x%08x\n", unit, st0, st1);
-		units &= ~(1 << unit);
-	}
-}
-
 static int
 nvc0_graph_create_fw(struct drm_device *dev, const char *fwname,
 		     struct nvc0_graph_fuc *fuc)
@@ -764,7 +748,6 @@
 	}
 
 	nouveau_irq_unregister(dev, 12);
-	nouveau_irq_unregister(dev, 25);
 
 	nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
 	nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
@@ -803,7 +786,6 @@
 
 	NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
 	nouveau_irq_register(dev, 12, nvc0_graph_isr);
-	nouveau_irq_register(dev, 25, nvc0_runk140_isr);
 
 	if (nouveau_ctxfw) {
 		NV_INFO(dev, "PGRAPH: using external firmware\n");
@@ -864,6 +846,9 @@
 	case 0xce: /* 4/4/0/0, 4 */
 		priv->magic_not_rop_nr = 0x03;
 		break;
+	case 0xcf: /* 4/0/0/0, 3 */
+		priv->magic_not_rop_nr = 0x03;
+		break;
 	}
 
 	if (!priv->magic_not_rop_nr) {
@@ -889,20 +874,3 @@
 	nvc0_graph_destroy(dev, NVOBJ_ENGINE_GR);
 	return ret;
 }
-
-MODULE_FIRMWARE("nouveau/nvc0_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc0_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc0_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc0_fuc41ad");
-MODULE_FIRMWARE("nouveau/nvc3_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc3_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc3_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc3_fuc41ad");
-MODULE_FIRMWARE("nouveau/nvc4_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc4_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc4_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc4_fuc41ad");
-MODULE_FIRMWARE("nouveau/fuc409c");
-MODULE_FIRMWARE("nouveau/fuc409d");
-MODULE_FIRMWARE("nouveau/fuc41ac");
-MODULE_FIRMWARE("nouveau/fuc41ad");
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.h b/drivers/gpu/drm/nouveau/nvc0_graph.h
index 55689e9..636fe98 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.h
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.h
@@ -82,6 +82,7 @@
 	case 0xc3:
 	case 0xc4:
 	case 0xce: /* guess, mmio trace shows only 0x9097 state */
+	case 0xcf: /* guess, mmio trace shows only 0x9097 state */
 		return 0x9097;
 	case 0xc1:
 		return 0x9197;
diff --git a/drivers/gpu/drm/nouveau/nvc0_grctx.c b/drivers/gpu/drm/nouveau/nvc0_grctx.c
index 31018ea..dd0e6a7 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grctx.c
+++ b/drivers/gpu/drm/nouveau/nvc0_grctx.c
@@ -1678,7 +1678,10 @@
 	nv_wr32(dev, 0x419c04, 0x00000006);
 	nv_wr32(dev, 0x419c08, 0x00000002);
 	nv_wr32(dev, 0x419c20, 0x00000000);
-	nv_wr32(dev, 0x419cb0, 0x00060048); //XXX: 0xce 0x00020048
+	if (chipset == 0xce || chipset == 0xcf)
+		nv_wr32(dev, 0x419cb0, 0x00020048);
+	else
+		nv_wr32(dev, 0x419cb0, 0x00060048);
 	nv_wr32(dev, 0x419ce8, 0x00000000);
 	nv_wr32(dev, 0x419cf4, 0x00000183);
 	nv_wr32(dev, 0x419d20, chipset != 0xc1 ? 0x02180000 : 0x12180000);
@@ -1783,11 +1786,7 @@
 	nv_wr32(dev, 0x40587c, 0x00000000);
 
 	if (1) {
-		const u8 chipset_tp_max[] = { 16, 4, 0, 4, 8, 0, 0, 0,
-					      16, 0, 0, 0, 0, 0, 8, 0 };
-		u8 max = chipset_tp_max[dev_priv->chipset & 0x0f];
-		u8 tpnr[GPC_MAX];
-		u8 data[TP_MAX];
+		u8 tpnr[GPC_MAX], data[TP_MAX];
 
 		memcpy(tpnr, priv->tp_nr, sizeof(priv->tp_nr));
 		memset(data, 0x1f, sizeof(data));
@@ -1801,7 +1800,7 @@
 			data[tp] = gpc;
 		}
 
-		for (i = 0; i < max / 4; i++)
+		for (i = 0; i < 4; i++)
 			nv_wr32(dev, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
index 0ec2add..06f5e26 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
@@ -77,6 +77,11 @@
 .b16 nvc0_gpc_mmio_tail
 .b16 nvc0_tpc_mmio_head
 .b16 nvc3_tpc_mmio_tail
+.b8  0xcf 0 0 0
+.b16 nvc0_gpc_mmio_head
+.b16 nvc0_gpc_mmio_tail
+.b16 nvc0_tpc_mmio_head
+.b16 nvcf_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
@@ -134,8 +139,9 @@
 nvc0_tpc_mmio_tail:
 mmctx_data(0x000758, 1)
 mmctx_data(0x0002c4, 1)
-mmctx_data(0x0004bc, 1)
 mmctx_data(0x0006e0, 1)
+nvcf_tpc_mmio_tail:
+mmctx_data(0x0004bc, 1)
 nvc3_tpc_mmio_tail:
 mmctx_data(0x000544, 1)
 nvc1_tpc_mmio_tail:
diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
index 1896c89..6f82032 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
@@ -25,23 +25,26 @@
 	0x00000000,
 	0x00000000,
 	0x000000c0,
-	0x011000b0,
-	0x01640114,
+	0x011c00bc,
+	0x01700120,
 	0x000000c1,
-	0x011400b0,
-	0x01780114,
+	0x012000bc,
+	0x01840120,
 	0x000000c3,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
 	0x000000c4,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
 	0x000000c8,
-	0x011000b0,
-	0x01640114,
+	0x011c00bc,
+	0x01700120,
 	0x000000ce,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
+	0x000000cf,
+	0x011c00bc,
+	0x017c0120,
 	0x00000000,
 	0x00000380,
 	0x14000400,
@@ -90,8 +93,8 @@
 	0x04000750,
 	0x00000758,
 	0x000002c4,
-	0x000004bc,
 	0x000006e0,
+	0x000004bc,
 	0x00000544,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
index a1a5991..e4f8c7e 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
@@ -56,6 +56,9 @@
 .b8  0xce 0 0 0
 .b16 nvc0_hub_mmio_head
 .b16 nvc0_hub_mmio_tail
+.b8  0xcf 0 0 0
+.b16 nvc0_hub_mmio_head
+.b16 nvc0_hub_mmio_tail
 .b8  0 0 0 0
 
 nvc0_hub_mmio_head:
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
index b3b541b..241d326 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
@@ -23,17 +23,19 @@
 	0x00000000,
 	0x00000000,
 	0x000000c0,
-	0x012c0090,
+	0x01340098,
 	0x000000c1,
-	0x01300090,
+	0x01380098,
 	0x000000c3,
-	0x012c0090,
+	0x01340098,
 	0x000000c4,
-	0x012c0090,
+	0x01340098,
 	0x000000c8,
-	0x012c0090,
+	0x01340098,
 	0x000000ce,
-	0x012c0090,
+	0x01340098,
+	0x000000cf,
+	0x01340098,
 	0x00000000,
 	0x0417e91c,
 	0x04400204,
@@ -190,8 +192,6 @@
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
-	0x00000000,
 };
 
 uint32_t nvc0_grhub_code[] = {
diff --git a/drivers/gpu/drm/nouveau/nvc0_pm.c b/drivers/gpu/drm/nouveau/nvc0_pm.c
new file mode 100644
index 0000000..929aded
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_pm.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_bios.h"
+#include "nouveau_pm.h"
+
+static u32 read_div(struct drm_device *, int, u32, u32);
+static u32 read_pll(struct drm_device *, u32);
+
+static u32
+read_vco(struct drm_device *dev, u32 dsrc)
+{
+	u32 ssrc = nv_rd32(dev, dsrc);
+	if (!(ssrc & 0x00000100))
+		return read_pll(dev, 0x00e800);
+	return read_pll(dev, 0x00e820);
+}
+
+static u32
+read_pll(struct drm_device *dev, u32 pll)
+{
+	u32 ctrl = nv_rd32(dev, pll + 0);
+	u32 coef = nv_rd32(dev, pll + 4);
+	u32 P = (coef & 0x003f0000) >> 16;
+	u32 N = (coef & 0x0000ff00) >> 8;
+	u32 M = (coef & 0x000000ff) >> 0;
+	u32 sclk, doff;
+
+	if (!(ctrl & 0x00000001))
+		return 0;
+
+	switch (pll & 0xfff000) {
+	case 0x00e000:
+		sclk = 27000;
+		P = 1;
+		break;
+	case 0x137000:
+		doff = (pll - 0x137000) / 0x20;
+		sclk = read_div(dev, doff, 0x137120, 0x137140);
+		break;
+	case 0x132000:
+		switch (pll) {
+		case 0x132000:
+			sclk = read_pll(dev, 0x132020);
+			break;
+		case 0x132020:
+			sclk = read_div(dev, 0, 0x137320, 0x137330);
+			break;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return sclk * N / M / P;
+}
+
+static u32
+read_div(struct drm_device *dev, int doff, u32 dsrc, u32 dctl)
+{
+	u32 ssrc = nv_rd32(dev, dsrc + (doff * 4));
+	u32 sctl = nv_rd32(dev, dctl + (doff * 4));
+
+	switch (ssrc & 0x00000003) {
+	case 0:
+		if ((ssrc & 0x00030000) != 0x00030000)
+			return 27000;
+		return 108000;
+	case 2:
+		return 100000;
+	case 3:
+		if (sctl & 0x80000000) {
+			u32 sclk = read_vco(dev, dsrc + (doff * 4));
+			u32 sdiv = (sctl & 0x0000003f) + 2;
+			return (sclk * 2) / sdiv;
+		}
+
+		return read_vco(dev, dsrc + (doff * 4));
+	default:
+		return 0;
+	}
+}
+
+static u32
+read_mem(struct drm_device *dev)
+{
+	u32 ssel = nv_rd32(dev, 0x1373f0);
+	if (ssel & 0x00000001)
+		return read_div(dev, 0, 0x137300, 0x137310);
+	return read_pll(dev, 0x132000);
+}
+
+static u32
+read_clk(struct drm_device *dev, int clk)
+{
+	u32 sctl = nv_rd32(dev, 0x137250 + (clk * 4));
+	u32 ssel = nv_rd32(dev, 0x137100);
+	u32 sclk, sdiv;
+
+	if (ssel & (1 << clk)) {
+		if (clk < 7)
+			sclk = read_pll(dev, 0x137000 + (clk * 0x20));
+		else
+			sclk = read_pll(dev, 0x1370e0);
+		sdiv = ((sctl & 0x00003f00) >> 8) + 2;
+	} else {
+		sclk = read_div(dev, clk, 0x137160, 0x1371d0);
+		sdiv = ((sctl & 0x0000003f) >> 0) + 2;
+	}
+
+	if (sctl & 0x80000000)
+		return (sclk * 2) / sdiv;
+	return sclk;
+}
+
+int
+nvc0_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	perflvl->shader = read_clk(dev, 0x00);
+	perflvl->core   = perflvl->shader / 2;
+	perflvl->memory = read_mem(dev);
+	perflvl->rop    = read_clk(dev, 0x01);
+	perflvl->hub07  = read_clk(dev, 0x02);
+	perflvl->hub06  = read_clk(dev, 0x07);
+	perflvl->hub01  = read_clk(dev, 0x08);
+	perflvl->copy   = read_clk(dev, 0x09);
+	perflvl->daemon = read_clk(dev, 0x0c);
+	perflvl->vdec   = read_clk(dev, 0x0e);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c
index e45a24d..edbfe93 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vram.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vram.c
@@ -61,7 +61,7 @@
 	      u32 type, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int ret;
@@ -106,12 +106,50 @@
 	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
 	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
 	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
-	u32 length;
+	u32 parts = nv_rd32(dev, 0x121c74);
+	u32 bsize = nv_rd32(dev, 0x10f20c);
+	u32 offset, length;
+	bool uniform = true;
+	int ret, i;
 
-	dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
-	dev_priv->vram_size *= nv_rd32(dev, 0x121c74);
+	NV_DEBUG(dev, "0x100800: 0x%08x\n", nv_rd32(dev, 0x100800));
+	NV_DEBUG(dev, "parts 0x%08x bcast_mem_amount 0x%08x\n", parts, bsize);
 
-	length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+	/* read amount of vram attached to each memory controller */
+	for (i = 0; i < parts; i++) {
+		u32 psize = nv_rd32(dev, 0x11020c + (i * 0x1000));
+		if (psize != bsize) {
+			if (psize < bsize)
+				bsize = psize;
+			uniform = false;
+		}
 
-	return nouveau_mm_init(&vram->mm, rsvd_head, length, 1);
+		NV_DEBUG(dev, "%d: mem_amount 0x%08x\n", i, psize);
+
+		dev_priv->vram_size += (u64)psize << 20;
+	}
+
+	/* if all controllers have the same amount attached, there's no holes */
+	if (uniform) {
+		offset = rsvd_head;
+		length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+		return nouveau_mm_init(&vram->mm, offset, length, 1);
+	}
+
+	/* otherwise, address lowest common amount from 0GiB */
+	ret = nouveau_mm_init(&vram->mm, rsvd_head, (bsize << 8) * parts, 1);
+	if (ret)
+		return ret;
+
+	/* and the rest starting from (8GiB + common_size) */
+	offset = (0x0200000000ULL >> 12) + (bsize << 8);
+	length = (dev_priv->vram_size >> 12) - (bsize << 8) - rsvd_tail;
+
+	ret = nouveau_mm_init(&vram->mm, offset, length, 0);
+	if (ret) {
+		nouveau_mm_fini(&vram->mm);
+		return ret;
+	}
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c
new file mode 100644
index 0000000..23d63b4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvd0_display.c
@@ -0,0 +1,1473 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/dma-mapping.h>
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_connector.h"
+#include "nouveau_encoder.h"
+#include "nouveau_crtc.h"
+#include "nouveau_dma.h"
+#include "nouveau_fb.h"
+#include "nv50_display.h"
+
+struct nvd0_display {
+	struct nouveau_gpuobj *mem;
+	struct {
+		dma_addr_t handle;
+		u32 *ptr;
+	} evo[1];
+
+	struct tasklet_struct tasklet;
+	u32 modeset;
+};
+
+static struct nvd0_display *
+nvd0_display(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	return dev_priv->engine.display.priv;
+}
+
+static inline int
+evo_icmd(struct drm_device *dev, int id, u32 mthd, u32 data)
+{
+	int ret = 0;
+	nv_mask(dev, 0x610700 + (id * 0x10), 0x00000001, 0x00000001);
+	nv_wr32(dev, 0x610704 + (id * 0x10), data);
+	nv_mask(dev, 0x610704 + (id * 0x10), 0x80000ffc, 0x80000000 | mthd);
+	if (!nv_wait(dev, 0x610704 + (id * 0x10), 0x80000000, 0x00000000))
+		ret = -EBUSY;
+	nv_mask(dev, 0x610700 + (id * 0x10), 0x00000001, 0x00000000);
+	return ret;
+}
+
+static u32 *
+evo_wait(struct drm_device *dev, int id, int nr)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 put = nv_rd32(dev, 0x640000 + (id * 0x1000)) / 4;
+
+	if (put + nr >= (PAGE_SIZE / 4)) {
+		disp->evo[id].ptr[put] = 0x20000000;
+
+		nv_wr32(dev, 0x640000 + (id * 0x1000), 0x00000000);
+		if (!nv_wait(dev, 0x640004 + (id * 0x1000), ~0, 0x00000000)) {
+			NV_ERROR(dev, "evo %d dma stalled\n", id);
+			return NULL;
+		}
+
+		put = 0;
+	}
+
+	return disp->evo[id].ptr + put;
+}
+
+static void
+evo_kick(u32 *push, struct drm_device *dev, int id)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	nv_wr32(dev, 0x640000 + (id * 0x1000), (push - disp->evo[id].ptr) << 2);
+}
+
+#define evo_mthd(p,m,s) *((p)++) = (((s) << 18) | (m))
+#define evo_data(p,d)   *((p)++) = (d)
+
+static struct drm_crtc *
+nvd0_display_crtc_get(struct drm_encoder *encoder)
+{
+	return nouveau_encoder(encoder)->crtc;
+}
+
+/******************************************************************************
+ * CRTC
+ *****************************************************************************/
+static int
+nvd0_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool on, bool update)
+{
+	struct drm_device *dev = nv_crtc->base.dev;
+	u32 *push, mode;
+
+	mode = 0x00000000;
+	if (on) {
+		/* 0x11: 6bpc dynamic 2x2
+		 * 0x13: 8bpc dynamic 2x2
+		 * 0x19: 6bpc static 2x2
+		 * 0x1b: 8bpc static 2x2
+		 * 0x21: 6bpc temporal
+		 * 0x23: 8bpc temporal
+		 */
+		mode = 0x00000011;
+	}
+
+	push = evo_wait(dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0490 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, mode);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, dev, 0);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_set_scale(struct nouveau_crtc *nv_crtc, int type, bool update)
+{
+	struct drm_display_mode *mode = &nv_crtc->base.mode;
+	struct drm_device *dev = nv_crtc->base.dev;
+	struct nouveau_connector *nv_connector;
+	u32 *push, outX, outY;
+
+	outX = mode->hdisplay;
+	outY = mode->vdisplay;
+
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	if (nv_connector && nv_connector->native_mode) {
+		struct drm_display_mode *native = nv_connector->native_mode;
+		u32 xratio = (native->hdisplay << 19) / mode->hdisplay;
+		u32 yratio = (native->vdisplay << 19) / mode->vdisplay;
+
+		switch (type) {
+		case DRM_MODE_SCALE_ASPECT:
+			if (xratio > yratio) {
+				outX = (mode->hdisplay * yratio) >> 19;
+				outY = (mode->vdisplay * yratio) >> 19;
+			} else {
+				outX = (mode->hdisplay * xratio) >> 19;
+				outY = (mode->vdisplay * xratio) >> 19;
+			}
+			break;
+		case DRM_MODE_SCALE_FULLSCREEN:
+			outX = native->hdisplay;
+			outY = native->vdisplay;
+			break;
+		default:
+			break;
+		}
+	}
+
+	push = evo_wait(dev, 0, 16);
+	if (push) {
+		evo_mthd(push, 0x04c0 + (nv_crtc->index * 0x300), 3);
+		evo_data(push, (outY << 16) | outX);
+		evo_data(push, (outY << 16) | outX);
+		evo_data(push, (outY << 16) | outX);
+		evo_mthd(push, 0x0494 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x04b8 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, (mode->vdisplay << 16) | mode->hdisplay);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, dev, 0);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_set_image(struct nouveau_crtc *nv_crtc, struct drm_framebuffer *fb,
+		    int x, int y, bool update)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(fb);
+	u32 *push;
+
+	push = evo_wait(fb->dev, 0, 16);
+	if (push) {
+		evo_mthd(push, 0x0460 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, nvfb->nvbo->bo.offset >> 8);
+		evo_mthd(push, 0x0468 + (nv_crtc->index * 0x300), 4);
+		evo_data(push, (fb->height << 16) | fb->width);
+		evo_data(push, nvfb->r_pitch);
+		evo_data(push, nvfb->r_format);
+		evo_data(push, nvfb->r_dma);
+		evo_mthd(push, 0x04b0 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, (y << 16) | x);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, fb->dev, 0);
+	}
+
+	nv_crtc->fb.tile_flags = nvfb->r_dma;
+	return 0;
+}
+
+static void
+nvd0_crtc_cursor_show(struct nouveau_crtc *nv_crtc, bool show, bool update)
+{
+	struct drm_device *dev = nv_crtc->base.dev;
+	u32 *push = evo_wait(dev, 0, 16);
+	if (push) {
+		if (show) {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, 0x85000000);
+			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, NvEvoVRAM);
+		} else {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x05000000);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
+		}
+
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+
+		evo_kick(push, dev, 0);
+	}
+}
+
+static void
+nvd0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+}
+
+static void
+nvd0_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 *push;
+
+	push = evo_wait(crtc->dev, 0, 2);
+	if (push) {
+		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x03000000);
+		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nvd0_crtc_cursor_show(nv_crtc, false, false);
+}
+
+static void
+nvd0_crtc_commit(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 *push;
+
+	push = evo_wait(crtc->dev, 0, 32);
+	if (push) {
+		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, nv_crtc->fb.tile_flags);
+		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 4);
+		evo_data(push, 0x83000000);
+		evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+		evo_data(push, 0x00000000);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, NvEvoVRAM);
+		evo_mthd(push, 0x0430 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0xffffff00);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nvd0_crtc_cursor_show(nv_crtc, nv_crtc->cursor.visible, true);
+}
+
+static bool
+nvd0_crtc_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *mode,
+		     struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int
+nvd0_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	int ret;
+
+	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
+	if (ret)
+		return ret;
+
+	if (old_fb) {
+		nvfb = nouveau_framebuffer(old_fb);
+		nouveau_bo_unpin(nvfb->nvbo);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
+		   struct drm_display_mode *mode, int x, int y,
+		   struct drm_framebuffer *old_fb)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nouveau_connector *nv_connector;
+	u32 htotal = mode->htotal;
+	u32 vtotal = mode->vtotal;
+	u32 hsyncw = mode->hsync_end - mode->hsync_start - 1;
+	u32 vsyncw = mode->vsync_end - mode->vsync_start - 1;
+	u32 hfrntp = mode->hsync_start - mode->hdisplay;
+	u32 vfrntp = mode->vsync_start - mode->vdisplay;
+	u32 hbackp = mode->htotal - mode->hsync_end;
+	u32 vbackp = mode->vtotal - mode->vsync_end;
+	u32 hss2be = hsyncw + hbackp;
+	u32 vss2be = vsyncw + vbackp;
+	u32 hss2de = htotal - hfrntp;
+	u32 vss2de = vtotal - vfrntp;
+	u32 syncs, *push;
+	int ret;
+
+	syncs = 0x00000001;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		syncs |= 0x00000008;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		syncs |= 0x00000010;
+
+	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	push = evo_wait(crtc->dev, 0, 64);
+	if (push) {
+		evo_mthd(push, 0x0410 + (nv_crtc->index * 0x300), 5);
+		evo_data(push, 0x00000000);
+		evo_data(push, (vtotal << 16) | htotal);
+		evo_data(push, (vsyncw << 16) | hsyncw);
+		evo_data(push, (vss2be << 16) | hss2be);
+		evo_data(push, (vss2de << 16) | hss2de);
+		evo_mthd(push, 0x042c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000); /* ??? */
+		evo_mthd(push, 0x0450 + (nv_crtc->index * 0x300), 3);
+		evo_data(push, mode->clock * 1000);
+		evo_data(push, 0x00200000); /* ??? */
+		evo_data(push, mode->clock * 1000);
+		evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, syncs);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	nvd0_crtc_set_dither(nv_crtc, nv_connector->use_dithering, false);
+	nvd0_crtc_set_scale(nv_crtc, nv_connector->scaling_mode, false);
+	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, false);
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+			struct drm_framebuffer *old_fb)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	int ret;
+
+	if (!crtc->fb) {
+		NV_DEBUG_KMS(crtc->dev, "No FB bound\n");
+		return 0;
+	}
+
+	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, true);
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb, int x, int y,
+			       enum mode_set_atomic state)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	nvd0_crtc_set_image(nv_crtc, fb, x, y, true);
+	return 0;
+}
+
+static void
+nvd0_crtc_lut_load(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
+	int i;
+
+	for (i = 0; i < 256; i++) {
+		writew(0x6000 + (nv_crtc->lut.r[i] >> 2), lut + (i * 0x20) + 0);
+		writew(0x6000 + (nv_crtc->lut.g[i] >> 2), lut + (i * 0x20) + 2);
+		writew(0x6000 + (nv_crtc->lut.b[i] >> 2), lut + (i * 0x20) + 4);
+	}
+}
+
+static int
+nvd0_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
+		     uint32_t handle, uint32_t width, uint32_t height)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	bool visible = (handle != 0);
+	int i, ret = 0;
+
+	if (visible) {
+		if (width != 64 || height != 64)
+			return -EINVAL;
+
+		gem = drm_gem_object_lookup(dev, file_priv, handle);
+		if (unlikely(!gem))
+			return -ENOENT;
+		nvbo = nouveau_gem_object(gem);
+
+		ret = nouveau_bo_map(nvbo);
+		if (ret == 0) {
+			for (i = 0; i < 64 * 64; i++) {
+				u32 v = nouveau_bo_rd32(nvbo, i);
+				nouveau_bo_wr32(nv_crtc->cursor.nvbo, i, v);
+			}
+			nouveau_bo_unmap(nvbo);
+		}
+
+		drm_gem_object_unreference_unlocked(gem);
+	}
+
+	if (visible != nv_crtc->cursor.visible) {
+		nvd0_crtc_cursor_show(nv_crtc, visible, true);
+		nv_crtc->cursor.visible = visible;
+	}
+
+	return ret;
+}
+
+static int
+nvd0_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	const u32 data = (y << 16) | x;
+
+	nv_wr32(crtc->dev, 0x64d084 + (nv_crtc->index * 0x1000), data);
+	nv_wr32(crtc->dev, 0x64d080 + (nv_crtc->index * 0x1000), 0x00000000);
+	return 0;
+}
+
+static void
+nvd0_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+		    uint32_t start, uint32_t size)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 end = max(start + size, (u32)256);
+	u32 i;
+
+	for (i = start; i < end; i++) {
+		nv_crtc->lut.r[i] = r[i];
+		nv_crtc->lut.g[i] = g[i];
+		nv_crtc->lut.b[i] = b[i];
+	}
+
+	nvd0_crtc_lut_load(crtc);
+}
+
+static void
+nvd0_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	nouveau_bo_unmap(nv_crtc->lut.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+	drm_crtc_cleanup(crtc);
+	kfree(crtc);
+}
+
+static const struct drm_crtc_helper_funcs nvd0_crtc_hfunc = {
+	.dpms = nvd0_crtc_dpms,
+	.prepare = nvd0_crtc_prepare,
+	.commit = nvd0_crtc_commit,
+	.mode_fixup = nvd0_crtc_mode_fixup,
+	.mode_set = nvd0_crtc_mode_set,
+	.mode_set_base = nvd0_crtc_mode_set_base,
+	.mode_set_base_atomic = nvd0_crtc_mode_set_base_atomic,
+	.load_lut = nvd0_crtc_lut_load,
+};
+
+static const struct drm_crtc_funcs nvd0_crtc_func = {
+	.cursor_set = nvd0_crtc_cursor_set,
+	.cursor_move = nvd0_crtc_cursor_move,
+	.gamma_set = nvd0_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = nvd0_crtc_destroy,
+};
+
+static void
+nvd0_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
+{
+}
+
+static void
+nvd0_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
+{
+}
+
+static int
+nvd0_crtc_create(struct drm_device *dev, int index)
+{
+	struct nouveau_crtc *nv_crtc;
+	struct drm_crtc *crtc;
+	int ret, i;
+
+	nv_crtc = kzalloc(sizeof(*nv_crtc), GFP_KERNEL);
+	if (!nv_crtc)
+		return -ENOMEM;
+
+	nv_crtc->index = index;
+	nv_crtc->set_dither = nvd0_crtc_set_dither;
+	nv_crtc->set_scale = nvd0_crtc_set_scale;
+	nv_crtc->cursor.set_offset = nvd0_cursor_set_offset;
+	nv_crtc->cursor.set_pos = nvd0_cursor_set_pos;
+	for (i = 0; i < 256; i++) {
+		nv_crtc->lut.r[i] = i << 8;
+		nv_crtc->lut.g[i] = i << 8;
+		nv_crtc->lut.b[i] = i << 8;
+	}
+
+	crtc = &nv_crtc->base;
+	drm_crtc_init(dev, crtc, &nvd0_crtc_func);
+	drm_crtc_helper_add(crtc, &nvd0_crtc_hfunc);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	ret = nouveau_bo_new(dev, 64 * 64 * 4, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, &nv_crtc->cursor.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret)
+			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
+		if (ret)
+			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, &nv_crtc->lut.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret)
+			ret = nouveau_bo_map(nv_crtc->lut.nvbo);
+		if (ret)
+			nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	nvd0_crtc_lut_load(crtc);
+
+out:
+	if (ret)
+		nvd0_crtc_destroy(crtc);
+	return ret;
+}
+
+/******************************************************************************
+ * DAC
+ *****************************************************************************/
+static void
+nvd0_dac_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	int or = nv_encoder->or;
+	u32 dpms_ctrl;
+
+	dpms_ctrl = 0x80000000;
+	if (mode == DRM_MODE_DPMS_STANDBY || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000001;
+	if (mode == DRM_MODE_DPMS_SUSPEND || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000004;
+
+	nv_wait(dev, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_mask(dev, 0x61a004 + (or * 0x0800), 0xc000007f, dpms_ctrl);
+	nv_wait(dev, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
+}
+
+static bool
+nvd0_dac_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
+	}
+
+	return true;
+}
+
+static void
+nvd0_dac_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_dac_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		  struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	u32 *push;
+
+	nvd0_dac_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(encoder->dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0180 + (nv_encoder->or * 0x20), 2);
+		evo_data(push, 1 << nv_crtc->index);
+		evo_data(push, 0x00ff);
+		evo_kick(push, encoder->dev, 0);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nvd0_dac_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		nvd0_crtc_prepare(nv_encoder->crtc);
+
+		push = evo_wait(dev, 0, 4);
+		if (push) {
+			evo_mthd(push, 0x0180 + (nv_encoder->or * 0x20), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, dev, 0);
+		}
+
+		nv_encoder->crtc = NULL;
+	}
+}
+
+static enum drm_connector_status
+nvd0_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	enum drm_connector_status status = connector_status_disconnected;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	int or = nv_encoder->or;
+	u32 load;
+
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x00100000);
+	udelay(9500);
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x80000000);
+
+	load = nv_rd32(dev, 0x61a00c + (or * 0x800));
+	if ((load & 0x38000000) == 0x38000000)
+		status = connector_status_connected;
+
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x00000000);
+	return status;
+}
+
+static void
+nvd0_dac_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nvd0_dac_hfunc = {
+	.dpms = nvd0_dac_dpms,
+	.mode_fixup = nvd0_dac_mode_fixup,
+	.prepare = nvd0_dac_prepare,
+	.commit = nvd0_dac_commit,
+	.mode_set = nvd0_dac_mode_set,
+	.disable = nvd0_dac_disconnect,
+	.get_crtc = nvd0_display_crtc_get,
+	.detect = nvd0_dac_detect
+};
+
+static const struct drm_encoder_funcs nvd0_dac_func = {
+	.destroy = nvd0_dac_destroy,
+};
+
+static int
+nvd0_dac_create(struct drm_connector *connector, struct dcb_entry *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nvd0_dac_func, DRM_MODE_ENCODER_DAC);
+	drm_encoder_helper_add(encoder, &nvd0_dac_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * SOR
+ *****************************************************************************/
+static void
+nvd0_sor_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct drm_encoder *partner;
+	int or = nv_encoder->or;
+	u32 dpms_ctrl;
+
+	nv_encoder->last_dpms = mode;
+
+	list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_partner = nouveau_encoder(partner);
+
+		if (partner->encoder_type != DRM_MODE_ENCODER_TMDS)
+			continue;
+
+		if (nv_partner != nv_encoder &&
+		    nv_partner->dcb->or == nv_encoder->or) {
+			if (nv_partner->last_dpms == DRM_MODE_DPMS_ON)
+				return;
+			break;
+		}
+	}
+
+	dpms_ctrl  = (mode == DRM_MODE_DPMS_ON);
+	dpms_ctrl |= 0x80000000;
+
+	nv_wait(dev, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_mask(dev, 0x61c004 + (or * 0x0800), 0x80000001, dpms_ctrl);
+	nv_wait(dev, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_wait(dev, 0x61c030 + (or * 0x0800), 0x10000000, 0x00000000);
+}
+
+static bool
+nvd0_sor_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
+	}
+
+	return true;
+}
+
+static void
+nvd0_sor_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_sor_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
+		  struct drm_display_mode *mode)
+{
+	struct drm_nouveau_private *dev_priv = encoder->dev->dev_private;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nvbios *bios = &dev_priv->vbios;
+	u32 mode_ctrl = (1 << nv_crtc->index);
+	u32 *push, or_config;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	switch (nv_encoder->dcb->type) {
+	case OUTPUT_TMDS:
+		if (nv_encoder->dcb->sorconf.link & 1) {
+			if (mode->clock < 165000)
+				mode_ctrl |= 0x00000100;
+			else
+				mode_ctrl |= 0x00000500;
+		} else {
+			mode_ctrl |= 0x00000200;
+		}
+
+		or_config = (mode_ctrl & 0x00000f00) >> 8;
+		if (mode->clock >= 165000)
+			or_config |= 0x0100;
+		break;
+	case OUTPUT_LVDS:
+		or_config = (mode_ctrl & 0x00000f00) >> 8;
+		if (bios->fp_no_ddc) {
+			if (bios->fp.dual_link)
+				or_config |= 0x0100;
+			if (bios->fp.if_is_24bit)
+				or_config |= 0x0200;
+		} else {
+			if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG) {
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					or_config |= 0x0100;
+			} else
+			if (mode->clock >= bios->fp.duallink_transition_clk) {
+				or_config |= 0x0100;
+			}
+
+			if (or_config & 0x0100) {
+				if (bios->fp.strapless_is_24bit & 2)
+					or_config |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					or_config |= 0x0200;
+			}
+
+			if (nv_connector->base.display_info.bpc == 8)
+				or_config |= 0x0200;
+
+		}
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	nvd0_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(encoder->dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 2);
+		evo_data(push, mode_ctrl);
+		evo_data(push, or_config);
+		evo_kick(push, encoder->dev, 0);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nvd0_sor_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		nvd0_crtc_prepare(nv_encoder->crtc);
+
+		push = evo_wait(dev, 0, 4);
+		if (push) {
+			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, dev, 0);
+		}
+
+		nv_encoder->crtc = NULL;
+		nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+	}
+}
+
+static void
+nvd0_sor_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nvd0_sor_hfunc = {
+	.dpms = nvd0_sor_dpms,
+	.mode_fixup = nvd0_sor_mode_fixup,
+	.prepare = nvd0_sor_prepare,
+	.commit = nvd0_sor_commit,
+	.mode_set = nvd0_sor_mode_set,
+	.disable = nvd0_sor_disconnect,
+	.get_crtc = nvd0_display_crtc_get,
+};
+
+static const struct drm_encoder_funcs nvd0_sor_func = {
+	.destroy = nvd0_sor_destroy,
+};
+
+static int
+nvd0_sor_create(struct drm_connector *connector, struct dcb_entry *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nvd0_sor_func, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(encoder, &nvd0_sor_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * IRQ
+ *****************************************************************************/
+static struct dcb_entry *
+lookup_dcb(struct drm_device *dev, int id, u32 mc)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int type, or, i;
+
+	if (id < 4) {
+		type = OUTPUT_ANALOG;
+		or   = id;
+	} else {
+		switch (mc & 0x00000f00) {
+		case 0x00000000: type = OUTPUT_LVDS; break;
+		case 0x00000100: type = OUTPUT_TMDS; break;
+		case 0x00000200: type = OUTPUT_TMDS; break;
+		case 0x00000500: type = OUTPUT_TMDS; break;
+		default:
+			NV_ERROR(dev, "PDISP: unknown SOR mc 0x%08x\n", mc);
+			return NULL;
+		}
+
+		or = id - 4;
+	}
+
+	for (i = 0; i < dev_priv->vbios.dcb.entries; i++) {
+		struct dcb_entry *dcb = &dev_priv->vbios.dcb.entry[i];
+		if (dcb->type == type && (dcb->or & (1 << or)))
+			return dcb;
+	}
+
+	NV_ERROR(dev, "PDISP: DCB for %d/0x%08x not found\n", id, mc);
+	return NULL;
+}
+
+static void
+nvd0_display_unk1_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(dev, 0x640180 + (i * 0x20));
+		if (!(mcc & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcc);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, 0x0000, -1, dcb, crtc);
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk2_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	u32 or, tmp, pclk;
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(dev, 0x640180 + (i * 0x20));
+		if (!(mcc & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcc);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, 0x0000, -2, dcb, crtc);
+	}
+
+	pclk = nv_rd32(dev, 0x660450 + (crtc * 0x300)) / 1000;
+	if (mask & 0x00010000) {
+		nv50_crtc_set_clock(dev, crtc, pclk);
+	}
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(dev, 0x660180 + (i * 0x20));
+		u32 cfg = nv_rd32(dev, 0x660184 + (i * 0x20));
+		if (!(mcp & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcp);
+		if (!dcb)
+			continue;
+		or = ffs(dcb->or) - 1;
+
+		nouveau_bios_run_display_table(dev, cfg, pclk, dcb, crtc);
+
+		nv_wr32(dev, 0x612200 + (crtc * 0x800), 0x00000000);
+		switch (dcb->type) {
+		case OUTPUT_ANALOG:
+			nv_wr32(dev, 0x612280 + (or * 0x800), 0x00000000);
+			break;
+		case OUTPUT_TMDS:
+		case OUTPUT_LVDS:
+			if (cfg & 0x00000100)
+				tmp = 0x00000101;
+			else
+				tmp = 0x00000000;
+
+			nv_mask(dev, 0x612300 + (or * 0x800), 0x00000707, tmp);
+			break;
+		default:
+			break;
+		}
+
+		break;
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk4_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	int pclk, i;
+
+	pclk = nv_rd32(dev, 0x660450 + (crtc * 0x300)) / 1000;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(dev, 0x660180 + (i * 0x20));
+		u32 cfg = nv_rd32(dev, 0x660184 + (i * 0x20));
+		if (!(mcp & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcp);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, cfg, -pclk, dcb, crtc);
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_bh(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 mask, crtc;
+	int i;
+
+	if (drm_debug & (DRM_UT_DRIVER | DRM_UT_KMS)) {
+		NV_INFO(dev, "PDISP: modeset req %d\n", disp->modeset);
+		NV_INFO(dev, " STAT: 0x%08x 0x%08x 0x%08x\n",
+			 nv_rd32(dev, 0x6101d0),
+			 nv_rd32(dev, 0x6101d4), nv_rd32(dev, 0x6109d4));
+		for (i = 0; i < 8; i++) {
+			NV_INFO(dev, " %s%d: 0x%08x 0x%08x\n",
+				i < 4 ? "DAC" : "SOR", i,
+				nv_rd32(dev, 0x640180 + (i * 0x20)),
+				nv_rd32(dev, 0x660180 + (i * 0x20)));
+		}
+	}
+
+	mask = nv_rd32(dev, 0x6101d4);
+	crtc = 0;
+	if (!mask) {
+		mask = nv_rd32(dev, 0x6109d4);
+		crtc = 1;
+	}
+
+	if (disp->modeset & 0x00000001)
+		nvd0_display_unk1_handler(dev, crtc, mask);
+	if (disp->modeset & 0x00000002)
+		nvd0_display_unk2_handler(dev, crtc, mask);
+	if (disp->modeset & 0x00000004)
+		nvd0_display_unk4_handler(dev, crtc, mask);
+}
+
+static void
+nvd0_display_intr(struct drm_device *dev)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 intr = nv_rd32(dev, 0x610088);
+
+	if (intr & 0x00000002) {
+		u32 stat = nv_rd32(dev, 0x61009c);
+		int chid = ffs(stat) - 1;
+		if (chid >= 0) {
+			u32 mthd = nv_rd32(dev, 0x6101f0 + (chid * 12));
+			u32 data = nv_rd32(dev, 0x6101f4 + (chid * 12));
+			u32 unkn = nv_rd32(dev, 0x6101f8 + (chid * 12));
+
+			NV_INFO(dev, "EvoCh: chid %d mthd 0x%04x data 0x%08x "
+				     "0x%08x 0x%08x\n",
+				chid, (mthd & 0x0000ffc), data, mthd, unkn);
+			nv_wr32(dev, 0x61009c, (1 << chid));
+			nv_wr32(dev, 0x6101f0 + (chid * 12), 0x90000000);
+		}
+
+		intr &= ~0x00000002;
+	}
+
+	if (intr & 0x00100000) {
+		u32 stat = nv_rd32(dev, 0x6100ac);
+
+		if (stat & 0x00000007) {
+			disp->modeset = stat;
+			tasklet_schedule(&disp->tasklet);
+
+			nv_wr32(dev, 0x6100ac, (stat & 0x00000007));
+			stat &= ~0x00000007;
+		}
+
+		if (stat) {
+			NV_INFO(dev, "PDISP: unknown intr24 0x%08x\n", stat);
+			nv_wr32(dev, 0x6100ac, stat);
+		}
+
+		intr &= ~0x00100000;
+	}
+
+	if (intr & 0x01000000) {
+		u32 stat = nv_rd32(dev, 0x6100bc);
+		nv_wr32(dev, 0x6100bc, stat);
+		intr &= ~0x01000000;
+	}
+
+	if (intr & 0x02000000) {
+		u32 stat = nv_rd32(dev, 0x6108bc);
+		nv_wr32(dev, 0x6108bc, stat);
+		intr &= ~0x02000000;
+	}
+
+	if (intr)
+		NV_INFO(dev, "PDISP: unknown intr 0x%08x\n", intr);
+}
+
+/******************************************************************************
+ * Init
+ *****************************************************************************/
+static void
+nvd0_display_fini(struct drm_device *dev)
+{
+	int i;
+
+	/* fini cursors */
+	for (i = 14; i >= 13; i--) {
+		if (!(nv_rd32(dev, 0x610490 + (i * 0x10)) & 0x00000001))
+			continue;
+
+		nv_mask(dev, 0x610490 + (i * 0x10), 0x00000001, 0x00000000);
+		nv_wait(dev, 0x610490 + (i * 0x10), 0x00010000, 0x00000000);
+		nv_mask(dev, 0x610090, 1 << i, 0x00000000);
+		nv_mask(dev, 0x6100a0, 1 << i, 0x00000000);
+	}
+
+	/* fini master */
+	if (nv_rd32(dev, 0x610490) & 0x00000010) {
+		nv_mask(dev, 0x610490, 0x00000010, 0x00000000);
+		nv_mask(dev, 0x610490, 0x00000003, 0x00000000);
+		nv_wait(dev, 0x610490, 0x80000000, 0x00000000);
+		nv_mask(dev, 0x610090, 0x00000001, 0x00000000);
+		nv_mask(dev, 0x6100a0, 0x00000001, 0x00000000);
+	}
+}
+
+int
+nvd0_display_init(struct drm_device *dev)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 *push;
+	int i;
+
+	if (nv_rd32(dev, 0x6100ac) & 0x00000100) {
+		nv_wr32(dev, 0x6100ac, 0x00000100);
+		nv_mask(dev, 0x6194e8, 0x00000001, 0x00000000);
+		if (!nv_wait(dev, 0x6194e8, 0x00000002, 0x00000000)) {
+			NV_ERROR(dev, "PDISP: 0x6194e8 0x%08x\n",
+				 nv_rd32(dev, 0x6194e8));
+			return -EBUSY;
+		}
+	}
+
+	/* nfi what these are exactly, i do know that SOR_MODE_CTRL won't
+	 * work at all unless you do the SOR part below.
+	 */
+	for (i = 0; i < 3; i++) {
+		u32 dac = nv_rd32(dev, 0x61a000 + (i * 0x800));
+		nv_wr32(dev, 0x6101c0 + (i * 0x800), dac);
+	}
+
+	for (i = 0; i < 4; i++) {
+		u32 sor = nv_rd32(dev, 0x61c000 + (i * 0x800));
+		nv_wr32(dev, 0x6301c4 + (i * 0x800), sor);
+	}
+
+	for (i = 0; i < 2; i++) {
+		u32 crtc0 = nv_rd32(dev, 0x616104 + (i * 0x800));
+		u32 crtc1 = nv_rd32(dev, 0x616108 + (i * 0x800));
+		u32 crtc2 = nv_rd32(dev, 0x61610c + (i * 0x800));
+		nv_wr32(dev, 0x6101b4 + (i * 0x800), crtc0);
+		nv_wr32(dev, 0x6101b8 + (i * 0x800), crtc1);
+		nv_wr32(dev, 0x6101bc + (i * 0x800), crtc2);
+	}
+
+	/* point at our hash table / objects, enable interrupts */
+	nv_wr32(dev, 0x610010, (disp->mem->vinst >> 8) | 9);
+	nv_mask(dev, 0x6100b0, 0x00000307, 0x00000307);
+
+	/* init master */
+	nv_wr32(dev, 0x610494, (disp->evo[0].handle >> 8) | 3);
+	nv_wr32(dev, 0x610498, 0x00010000);
+	nv_wr32(dev, 0x61049c, 0x00000001);
+	nv_mask(dev, 0x610490, 0x00000010, 0x00000010);
+	nv_wr32(dev, 0x640000, 0x00000000);
+	nv_wr32(dev, 0x610490, 0x01000013);
+	if (!nv_wait(dev, 0x610490, 0x80000000, 0x00000000)) {
+		NV_ERROR(dev, "PDISP: master 0x%08x\n",
+			 nv_rd32(dev, 0x610490));
+		return -EBUSY;
+	}
+	nv_mask(dev, 0x610090, 0x00000001, 0x00000001);
+	nv_mask(dev, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* init cursors */
+	for (i = 13; i <= 14; i++) {
+		nv_wr32(dev, 0x610490 + (i * 0x10), 0x00000001);
+		if (!nv_wait(dev, 0x610490 + (i * 0x10), 0x00010000, 0x00010000)) {
+			NV_ERROR(dev, "PDISP: curs%d 0x%08x\n", i,
+				 nv_rd32(dev, 0x610490 + (i * 0x10)));
+			return -EBUSY;
+		}
+
+		nv_mask(dev, 0x610090, 1 << i, 1 << i);
+		nv_mask(dev, 0x6100a0, 1 << i, 1 << i);
+	}
+
+	push = evo_wait(dev, 0, 32);
+	if (!push)
+		return -EBUSY;
+	evo_mthd(push, 0x0088, 1);
+	evo_data(push, NvEvoSync);
+	evo_mthd(push, 0x0084, 1);
+	evo_data(push, 0x00000000);
+	evo_mthd(push, 0x0084, 1);
+	evo_data(push, 0x80000000);
+	evo_mthd(push, 0x008c, 1);
+	evo_data(push, 0x00000000);
+	evo_kick(push, dev, 0);
+
+	return 0;
+}
+
+void
+nvd0_display_destroy(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvd0_display *disp = nvd0_display(dev);
+	struct pci_dev *pdev = dev->pdev;
+
+	nvd0_display_fini(dev);
+
+	pci_free_consistent(pdev, PAGE_SIZE, disp->evo[0].ptr, disp->evo[0].handle);
+	nouveau_gpuobj_ref(NULL, &disp->mem);
+	nouveau_irq_unregister(dev, 26);
+
+	dev_priv->engine.display.priv = NULL;
+	kfree(disp);
+}
+
+int
+nvd0_display_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
+	struct drm_connector *connector, *tmp;
+	struct pci_dev *pdev = dev->pdev;
+	struct nvd0_display *disp;
+	struct dcb_entry *dcbe;
+	int ret, i;
+
+	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
+	if (!disp)
+		return -ENOMEM;
+	dev_priv->engine.display.priv = disp;
+
+	/* create crtc objects to represent the hw heads */
+	for (i = 0; i < 2; i++) {
+		ret = nvd0_crtc_create(dev, i);
+		if (ret)
+			goto out;
+	}
+
+	/* create encoder/connector objects based on VBIOS DCB table */
+	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
+		connector = nouveau_connector_create(dev, dcbe->connector);
+		if (IS_ERR(connector))
+			continue;
+
+		if (dcbe->location != DCB_LOC_ON_CHIP) {
+			NV_WARN(dev, "skipping off-chip encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
+		}
+
+		switch (dcbe->type) {
+		case OUTPUT_TMDS:
+		case OUTPUT_LVDS:
+			nvd0_sor_create(connector, dcbe);
+			break;
+		case OUTPUT_ANALOG:
+			nvd0_dac_create(connector, dcbe);
+			break;
+		default:
+			NV_WARN(dev, "skipping unsupported encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
+		}
+	}
+
+	/* cull any connectors we created that don't have an encoder */
+	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
+		if (connector->encoder_ids[0])
+			continue;
+
+		NV_WARN(dev, "%s has no encoders, removing\n",
+			drm_get_connector_name(connector));
+		connector->funcs->destroy(connector);
+	}
+
+	/* setup interrupt handling */
+	tasklet_init(&disp->tasklet, nvd0_display_bh, (unsigned long)dev);
+	nouveau_irq_register(dev, 26, nvd0_display_intr);
+
+	/* hash table and dma objects for the memory areas we care about */
+	ret = nouveau_gpuobj_new(dev, NULL, 0x4000, 0x10000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &disp->mem);
+	if (ret)
+		goto out;
+
+	nv_wo32(disp->mem, 0x1000, 0x00000049);
+	nv_wo32(disp->mem, 0x1004, (disp->mem->vinst + 0x2000) >> 8);
+	nv_wo32(disp->mem, 0x1008, (disp->mem->vinst + 0x2fff) >> 8);
+	nv_wo32(disp->mem, 0x100c, 0x00000000);
+	nv_wo32(disp->mem, 0x1010, 0x00000000);
+	nv_wo32(disp->mem, 0x1014, 0x00000000);
+	nv_wo32(disp->mem, 0x0000, NvEvoSync);
+	nv_wo32(disp->mem, 0x0004, (0x1000 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1020, 0x00000049);
+	nv_wo32(disp->mem, 0x1024, 0x00000000);
+	nv_wo32(disp->mem, 0x1028, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x102c, 0x00000000);
+	nv_wo32(disp->mem, 0x1030, 0x00000000);
+	nv_wo32(disp->mem, 0x1034, 0x00000000);
+	nv_wo32(disp->mem, 0x0008, NvEvoVRAM);
+	nv_wo32(disp->mem, 0x000c, (0x1020 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1040, 0x00000009);
+	nv_wo32(disp->mem, 0x1044, 0x00000000);
+	nv_wo32(disp->mem, 0x1048, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x104c, 0x00000000);
+	nv_wo32(disp->mem, 0x1050, 0x00000000);
+	nv_wo32(disp->mem, 0x1054, 0x00000000);
+	nv_wo32(disp->mem, 0x0010, NvEvoVRAM_LP);
+	nv_wo32(disp->mem, 0x0014, (0x1040 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1060, 0x0fe00009);
+	nv_wo32(disp->mem, 0x1064, 0x00000000);
+	nv_wo32(disp->mem, 0x1068, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x106c, 0x00000000);
+	nv_wo32(disp->mem, 0x1070, 0x00000000);
+	nv_wo32(disp->mem, 0x1074, 0x00000000);
+	nv_wo32(disp->mem, 0x0018, NvEvoFB32);
+	nv_wo32(disp->mem, 0x001c, (0x1060 << 9) | 0x00000001);
+
+	pinstmem->flush(dev);
+
+	/* push buffers for evo channels */
+	disp->evo[0].ptr =
+		pci_alloc_consistent(pdev, PAGE_SIZE, &disp->evo[0].handle);
+	if (!disp->evo[0].ptr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = nvd0_display_init(dev);
+	if (ret)
+		goto out;
+
+out:
+	if (ret)
+		nvd0_display_destroy(dev);
+	return ret;
+}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index dc0a5b5..a72dbb3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -935,6 +935,9 @@
 	WREG32(VM_CONTEXT1_CNTL, 0);
 
 	evergreen_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index cbf57d7..bf4fce7 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -999,6 +999,9 @@
 	WREG32(VM_CONTEXT1_CNTL, 0);
 
 	cayman_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index f2204cb..574f2c7 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -513,6 +513,9 @@
 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 	WREG32(RADEON_AIC_CNTL, tmp);
 	r100_pci_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55a7f19..33f2b68 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -144,8 +144,9 @@
 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
 	rv370_pcie_gart_tlb_flush(rdev);
-	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
-		 (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index aa5571b..334aee6 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -993,6 +993,9 @@
 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
 	r600_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 32807ba..0040d28 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1143,6 +1143,8 @@
 				struct drm_file *filp);
 int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
+int radeon_gem_wait_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
 
 /* VRAM scratch page for HDP bug */
 struct r700_vram_scratch {
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index fae00c0..f0b9066 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -80,6 +80,10 @@
 			p->relocs[i].lobj.wdomain = r->write_domain;
 			p->relocs[i].lobj.rdomain = r->read_domains;
 			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+			if (r->read_domains)
+				p->relocs[i].lobj.tv.usage |= TTM_USAGE_READ;
+			if (r->write_domain)
+				p->relocs[i].lobj.tv.usage |= TTM_USAGE_WRITE;
 			p->relocs[i].handle = r->handle;
 			p->relocs[i].flags = r->flags;
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e71d2ed..bd187e0 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -52,9 +52,10 @@
  *   2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query
  *   2.10.0 - fusion 2D tiling
  *   2.11.0 - backend map, initial compute support for the CS checker
+ *   2.12.0 - DRM_RADEON_GEM_WAIT ioctl
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	11
+#define KMS_DRIVER_MINOR	12
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a533f52..fdc3a9a 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -142,7 +142,7 @@
 	u64 page_base;
 
 	if (!rdev->gart.ready) {
-		WARN(1, "trying to unbind memory to unitialized GART !\n");
+		WARN(1, "trying to unbind memory from uninitialized GART !\n");
 		return;
 	}
 	t = offset / RADEON_GPU_PAGE_SIZE;
@@ -174,7 +174,7 @@
 	int i, j;
 
 	if (!rdev->gart.ready) {
-		WARN(1, "trying to bind memory to unitialized GART !\n");
+		WARN(1, "trying to bind memory to uninitialized GART !\n");
 		return -EINVAL;
 	}
 	t = offset / RADEON_GPU_PAGE_SIZE;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index aa1ca2d..2edc2a4 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -122,7 +122,7 @@
 	}
 	if (domain == RADEON_GEM_DOMAIN_CPU) {
 		/* Asking for cpu access wait for object idle */
-		r = radeon_bo_wait(robj, NULL, false);
+		r = radeon_bo_wait(robj, NULL, false, TTM_USAGE_READWRITE);
 		if (r) {
 			printk(KERN_ERR "Failed to wait for object !\n");
 			return r;
@@ -273,7 +273,7 @@
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, &cur_placement, true);
+	r = radeon_bo_wait(robj, &cur_placement, true, TTM_USAGE_READWRITE);
 	switch (cur_placement) {
 	case TTM_PL_VRAM:
 		args->domain = RADEON_GEM_DOMAIN_VRAM;
@@ -303,7 +303,7 @@
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, NULL, false);
+	r = radeon_bo_wait(robj, NULL, false, TTM_USAGE_READWRITE);
 	/* callback hw specific functions if any */
 	if (robj->rdev->asic->ioctl_wait_idle)
 		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
@@ -311,6 +311,36 @@
 	return r;
 }
 
+int radeon_gem_wait_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	struct drm_radeon_gem_wait *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *robj;
+	bool no_wait = (args->flags & RADEON_GEM_NO_WAIT) != 0;
+	enum ttm_buffer_usage usage = 0;
+	int r;
+
+	if (args->flags & RADEON_GEM_USAGE_READ)
+		usage |= TTM_USAGE_READ;
+	if (args->flags & RADEON_GEM_USAGE_WRITE)
+		usage |= TTM_USAGE_WRITE;
+	if (!usage)
+		usage = TTM_USAGE_READWRITE;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -ENOENT;
+	}
+	robj = gem_to_radeon_bo(gobj);
+	r = radeon_bo_wait(robj, NULL, no_wait, usage);
+	/* callback hw specific functions if any */
+	if (!no_wait && robj->rdev->asic->ioctl_wait_idle)
+		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
+	drm_gem_object_unreference_unlocked(gobj);
+	return r;
+}
+
 int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index be2c122..a749c26 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -451,5 +451,6 @@
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT, radeon_gem_wait_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13..a057a8e 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -119,7 +119,7 @@
 }
 
 static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-					bool no_wait)
+				 bool no_wait, enum ttm_buffer_usage usage)
 {
 	int r;
 
@@ -130,7 +130,7 @@
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
 	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait, usage);
 	spin_unlock(&bo->tbo.bdev->fence_lock);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index aa6a66e..89a6e1e 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -182,6 +182,9 @@
 	/* Enable gart */
 	WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg));
 	rs400_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 4b5d0e6..9320dd6 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -484,6 +484,9 @@
 	tmp = RREG32_MC(R_000009_MC_CNTL1);
 	WREG32_MC(R_000009_MC_CNTL1, (tmp | S_000009_ENABLE_PAGE_TABLES(1)));
 	rs600_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 4720d00..80928f9 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -161,6 +161,9 @@
 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
 	r600_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a4d38d8..b824d9b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -498,7 +498,7 @@
 	int ret;
 
 	spin_lock(&bdev->fence_lock);
-	(void) ttm_bo_wait(bo, false, false, true);
+	(void) ttm_bo_wait(bo, false, false, true, TTM_USAGE_READWRITE);
 	if (!bo->sync_obj) {
 
 		spin_lock(&glob->lru_lock);
@@ -566,7 +566,8 @@
 
 retry:
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+			  TTM_USAGE_READWRITE);
 	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0))
@@ -725,7 +726,8 @@
 	int ret = 0;
 
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+			  TTM_USAGE_READWRITE);
 	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0)) {
@@ -1072,7 +1074,8 @@
 	 * instead of doing it here.
 	 */
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+			  TTM_USAGE_READWRITE);
 	spin_unlock(&bdev->fence_lock);
 	if (ret)
 		return ret;
@@ -1692,34 +1695,83 @@
 	return ret;
 }
 
+static void ttm_bo_unref_sync_obj_locked(struct ttm_buffer_object *bo,
+					 void *sync_obj,
+					 void **extra_sync_obj)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_driver *driver = bdev->driver;
+	void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
+
+	/* We must unref the sync obj wherever it's ref'd.
+	 * Note that if we unref bo->sync_obj, we can unref both the read
+	 * and write sync objs too, because they can't be newer than
+	 * bo->sync_obj, so they are no longer relevant. */
+	if (sync_obj == bo->sync_obj ||
+	    sync_obj == bo->sync_obj_read) {
+		tmp_obj_read = bo->sync_obj_read;
+		bo->sync_obj_read = NULL;
+	}
+	if (sync_obj == bo->sync_obj ||
+	    sync_obj == bo->sync_obj_write) {
+		tmp_obj_write = bo->sync_obj_write;
+		bo->sync_obj_write = NULL;
+	}
+	if (sync_obj == bo->sync_obj) {
+		tmp_obj = bo->sync_obj;
+		bo->sync_obj = NULL;
+	}
+
+	clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+	spin_unlock(&bdev->fence_lock);
+	if (tmp_obj)
+		driver->sync_obj_unref(&tmp_obj);
+	if (tmp_obj_read)
+		driver->sync_obj_unref(&tmp_obj_read);
+	if (tmp_obj_write)
+		driver->sync_obj_unref(&tmp_obj_write);
+	if (extra_sync_obj)
+		driver->sync_obj_unref(extra_sync_obj);
+	spin_lock(&bdev->fence_lock);
+}
+
 int ttm_bo_wait(struct ttm_buffer_object *bo,
-		bool lazy, bool interruptible, bool no_wait)
+		bool lazy, bool interruptible, bool no_wait,
+		enum ttm_buffer_usage usage)
 {
 	struct ttm_bo_driver *driver = bo->bdev->driver;
 	struct ttm_bo_device *bdev = bo->bdev;
 	void *sync_obj;
 	void *sync_obj_arg;
 	int ret = 0;
+	void **bo_sync_obj;
 
-	if (likely(bo->sync_obj == NULL))
+	switch (usage) {
+	case TTM_USAGE_READ:
+		bo_sync_obj = &bo->sync_obj_read;
+		break;
+	case TTM_USAGE_WRITE:
+		bo_sync_obj = &bo->sync_obj_write;
+		break;
+	case TTM_USAGE_READWRITE:
+	default:
+		bo_sync_obj = &bo->sync_obj;
+	}
+
+	if (likely(*bo_sync_obj == NULL))
 		return 0;
 
-	while (bo->sync_obj) {
+	while (*bo_sync_obj) {
 
-		if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
-			void *tmp_obj = bo->sync_obj;
-			bo->sync_obj = NULL;
-			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-			spin_unlock(&bdev->fence_lock);
-			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bdev->fence_lock);
+		if (driver->sync_obj_signaled(*bo_sync_obj, bo->sync_obj_arg)) {
+			ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, NULL);
 			continue;
 		}
 
 		if (no_wait)
 			return -EBUSY;
 
-		sync_obj = driver->sync_obj_ref(bo->sync_obj);
+		sync_obj = driver->sync_obj_ref(*bo_sync_obj);
 		sync_obj_arg = bo->sync_obj_arg;
 		spin_unlock(&bdev->fence_lock);
 		ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
@@ -1730,16 +1782,9 @@
 			return ret;
 		}
 		spin_lock(&bdev->fence_lock);
-		if (likely(bo->sync_obj == sync_obj &&
+		if (likely(*bo_sync_obj == sync_obj &&
 			   bo->sync_obj_arg == sync_obj_arg)) {
-			void *tmp_obj = bo->sync_obj;
-			bo->sync_obj = NULL;
-			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
-				  &bo->priv_flags);
-			spin_unlock(&bdev->fence_lock);
-			driver->sync_obj_unref(&sync_obj);
-			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bdev->fence_lock);
+			ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, &sync_obj);
 		} else {
 			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&sync_obj);
@@ -1763,7 +1808,7 @@
 	if (unlikely(ret != 0))
 		return ret;
 	spin_lock(&bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, true, no_wait);
+	ret = ttm_bo_wait(bo, false, true, no_wait, TTM_USAGE_READWRITE);
 	spin_unlock(&bdev->fence_lock);
 	if (likely(ret == 0))
 		atomic_inc(&bo->cpu_writers);
@@ -1837,7 +1882,7 @@
 	 */
 
 	spin_lock(&bo->bdev->fence_lock);
-	ret = ttm_bo_wait(bo, false, false, false);
+	ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
 	spin_unlock(&bo->bdev->fence_lock);
 
 	if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ae3c6f5..6135f58 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -436,6 +436,8 @@
 	atomic_set(&fbo->cpu_writers, 0);
 
 	fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
+	fbo->sync_obj_read = driver->sync_obj_ref(bo->sync_obj_read);
+	fbo->sync_obj_write = driver->sync_obj_ref(bo->sync_obj_write);
 	kref_init(&fbo->list_kref);
 	kref_init(&fbo->kref);
 	fbo->destroy = &ttm_transfered_destroy;
@@ -618,20 +620,30 @@
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int ret;
 	struct ttm_buffer_object *ghost_obj;
-	void *tmp_obj = NULL;
+	void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
 
 	spin_lock(&bdev->fence_lock);
-	if (bo->sync_obj) {
+	if (bo->sync_obj)
 		tmp_obj = bo->sync_obj;
-		bo->sync_obj = NULL;
-	}
+	if (bo->sync_obj_read)
+		tmp_obj_read = bo->sync_obj_read;
+	if (bo->sync_obj_write)
+		tmp_obj_write = bo->sync_obj_write;
+
 	bo->sync_obj = driver->sync_obj_ref(sync_obj);
+	bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+	bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
 	bo->sync_obj_arg = sync_obj_arg;
 	if (evict) {
-		ret = ttm_bo_wait(bo, false, false, false);
+		ret = ttm_bo_wait(bo, false, false, false,
+				  TTM_USAGE_READWRITE);
 		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
+		if (tmp_obj_read)
+			driver->sync_obj_unref(&tmp_obj_read);
+		if (tmp_obj_write)
+			driver->sync_obj_unref(&tmp_obj_write);
 		if (ret)
 			return ret;
 
@@ -655,6 +667,10 @@
 		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
+		if (tmp_obj_read)
+			driver->sync_obj_unref(&tmp_obj_read);
+		if (tmp_obj_write)
+			driver->sync_obj_unref(&tmp_obj_write);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 221b924..ff1e26f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -122,7 +122,7 @@
 
 	spin_lock(&bdev->fence_lock);
 	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
-		ret = ttm_bo_wait(bo, false, true, false);
+		ret = ttm_bo_wait(bo, false, true, false, TTM_USAGE_READWRITE);
 		spin_unlock(&bdev->fence_lock);
 		if (unlikely(ret != 0)) {
 			retval = (ret != -ERESTARTSYS) ?
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 3832fe1..36d111a 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -221,8 +221,18 @@
 
 	list_for_each_entry(entry, list, head) {
 		bo = entry->bo;
+		entry->old_sync_obj_read = NULL;
+		entry->old_sync_obj_write = NULL;
 		entry->old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
+		if (entry->usage & TTM_USAGE_READ) {
+			entry->old_sync_obj_read = bo->sync_obj_read;
+			bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+		}
+		if (entry->usage & TTM_USAGE_WRITE) {
+			entry->old_sync_obj_write = bo->sync_obj_write;
+			bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
+		}
 		bo->sync_obj_arg = entry->new_sync_obj_arg;
 		ttm_bo_unreserve_locked(bo);
 		entry->reserved = false;
@@ -231,8 +241,15 @@
 	spin_unlock(&bdev->fence_lock);
 
 	list_for_each_entry(entry, list, head) {
-		if (entry->old_sync_obj)
+		if (entry->old_sync_obj) {
 			driver->sync_obj_unref(&entry->old_sync_obj);
+		}
+		if (entry->old_sync_obj_read) {
+			driver->sync_obj_unref(&entry->old_sync_obj_read);
+		}
+		if (entry->old_sync_obj_write) {
+			driver->sync_obj_unref(&entry->old_sync_obj_write);
+		}
 	}
 }
 EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index c9281a1..7d8e9d5 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -4,6 +4,7 @@
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
-	    vmwgfx_overlay.o vmwgfx_fence.o vmwgfx_gmrid_manager.o
+	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
+	    vmwgfx_fence.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index 1b96c2e..ec5aad9 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -39,6 +39,15 @@
 #define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
 
 /*
+ * SVGA_REG_ENABLE bit definitions.
+ */
+#define SVGA_REG_ENABLE_DISABLE     0
+#define SVGA_REG_ENABLE_ENABLE      1
+#define SVGA_REG_ENABLE_HIDE        2
+#define SVGA_REG_ENABLE_ENABLE_HIDE (SVGA_REG_ENABLE_ENABLE |\
+				     SVGA_REG_ENABLE_HIDE)
+
+/*
  * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
  * cursor bypass mode. This is still supported, but no new guest
  * drivers should use it.
@@ -158,7 +167,9 @@
    SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,
 
    SVGA_REG_TRACES = 45,            /* Enable trace-based updates even when FIFO is on */
-   SVGA_REG_TOP = 46,               /* Must be 1 more than the last register */
+   SVGA_REG_GMRS_MAX_PAGES = 46,    /* Maximum number of 4KB pages for all GMRs */
+   SVGA_REG_MEMORY_SIZE = 47,       /* Total dedicated device memory excluding FIFO */
+   SVGA_REG_TOP = 48,               /* Must be 1 more than the last register */
 
    SVGA_PALETTE_BASE = 1024,        /* Base of SVGA color map */
    /* Next 768 (== 256*3) registers exist for colormap */
@@ -370,6 +381,15 @@
  *  Note the holes in the bitfield. Missing bits have been deprecated,
  *  and must not be reused. Those capabilities will never be reported
  *  by new versions of the SVGA device.
+ *
+ * SVGA_CAP_GMR2 --
+ *    Provides asynchronous commands to define and remap guest memory
+ *    regions.  Adds device registers SVGA_REG_GMRS_MAX_PAGES and
+ *    SVGA_REG_MEMORY_SIZE.
+ *
+ * SVGA_CAP_SCREEN_OBJECT_2 --
+ *    Allow screen object support, and require backing stores from the
+ *    guest for each screen object.
  */
 
 #define SVGA_CAP_NONE               0x00000000
@@ -387,6 +407,8 @@
 #define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
+#define SVGA_CAP_GMR2               0x00400000
+#define SVGA_CAP_SCREEN_OBJECT_2    0x00800000
 
 
 /*
@@ -885,6 +907,8 @@
    SVGA_CMD_BLIT_SCREEN_TO_GMRFB  = 38,
    SVGA_CMD_ANNOTATION_FILL       = 39,
    SVGA_CMD_ANNOTATION_COPY       = 40,
+   SVGA_CMD_DEFINE_GMR2           = 41,
+   SVGA_CMD_REMAP_GMR2            = 42,
    SVGA_CMD_MAX
 } SVGAFifoCmdId;
 
@@ -1343,4 +1367,74 @@
    uint32           srcScreenId;
 } SVGAFifoCmdAnnotationCopy;
 
+
+/*
+ * SVGA_CMD_DEFINE_GMR2 --
+ *
+ *    Define guest memory region v2.  See the description of GMRs above.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef
+struct {
+   uint32 gmrId;
+   uint32 numPages;
+}
+SVGAFifoCmdDefineGMR2;
+
+
+/*
+ * SVGA_CMD_REMAP_GMR2 --
+ *
+ *    Remap guest memory region v2.  See the description of GMRs above.
+ *
+ *    This command allows guest to modify a portion of an existing GMR by
+ *    invalidating it or reassigning it to different guest physical pages.
+ *    The pages are identified by physical page number (PPN).  The pages
+ *    are assumed to be pinned and valid for DMA operations.
+ *
+ *    Description of command flags:
+ *
+ *    SVGA_REMAP_GMR2_VIA_GMR: If enabled, references a PPN list in a GMR.
+ *       The PPN list must not overlap with the remap region (this can be
+ *       handled trivially by referencing a separate GMR).  If flag is
+ *       disabled, PPN list is appended to SVGARemapGMR command.
+ *
+ *    SVGA_REMAP_GMR2_PPN64: If set, PPN list is in PPN64 format, otherwise
+ *       it is in PPN32 format.
+ *
+ *    SVGA_REMAP_GMR2_SINGLE_PPN: If set, PPN list contains a single entry.
+ *       A single PPN can be used to invalidate a portion of a GMR or
+ *       map it to to a single guest scratch page.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef enum {
+   SVGA_REMAP_GMR2_PPN32         = 0,
+   SVGA_REMAP_GMR2_VIA_GMR       = (1 << 0),
+   SVGA_REMAP_GMR2_PPN64         = (1 << 1),
+   SVGA_REMAP_GMR2_SINGLE_PPN    = (1 << 2),
+} SVGARemapGMR2Flags;
+
+typedef
+struct {
+   uint32 gmrId;
+   SVGARemapGMR2Flags flags;
+	uint32 offsetPages; /* offset in pages to begin remap */
+	uint32 numPages; /* number of pages to remap */
+   /*
+    * Followed by additional data depending on SVGARemapGMR2Flags.
+    *
+    * If flag SVGA_REMAP_GMR2_VIA_GMR is set, single SVGAGuestPtr follows.
+    * Otherwise an array of page descriptors in PPN32 or PPN64 format
+    * (according to flag SVGA_REMAP_GMR2_PPN64) follows.  If flag
+    * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry.
+    */
+}
+SVGAFifoCmdRemapGMR2;
+
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 87e43e0..5d665ce 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -274,39 +274,39 @@
 
 static void *vmw_sync_obj_ref(void *sync_obj)
 {
-	return sync_obj;
+
+	return (void *)
+		vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
 }
 
 static void vmw_sync_obj_unref(void **sync_obj)
 {
-	*sync_obj = NULL;
+	vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
 }
 
 static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-
-	mutex_lock(&dev_priv->hw_mutex);
-	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
-	mutex_unlock(&dev_priv->hw_mutex);
+	vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
 	return 0;
 }
 
 static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
+	return	vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
+				       (uint32_t) flags);
 
-	return vmw_fence_signaled(dev_priv, sequence);
 }
 
 static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
 			     bool lazy, bool interruptible)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
 
-	return vmw_wait_fence(dev_priv, false, sequence, false, 3*HZ);
+	return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
+				  (uint32_t) flags,
+				  lazy, interruptible,
+				  VMW_FENCE_WAIT_TIMEOUT);
 }
 
 struct ttm_bo_driver vmw_bo_driver = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 96949b9..d4829cb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -82,16 +82,18 @@
 #define DRM_IOCTL_VMW_EXECBUF					\
 	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_EXECBUF,		\
 		struct drm_vmw_execbuf_arg)
-#define DRM_IOCTL_VMW_FIFO_DEBUG				\
-	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FIFO_DEBUG,		\
-		 struct drm_vmw_fifo_debug_arg)
+#define DRM_IOCTL_VMW_GET_3D_CAP				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_GET_3D_CAP,		\
+		 struct drm_vmw_get_3d_cap_arg)
 #define DRM_IOCTL_VMW_FENCE_WAIT				\
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_WAIT,		\
 		 struct drm_vmw_fence_wait_arg)
-#define DRM_IOCTL_VMW_UPDATE_LAYOUT				\
-	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_UPDATE_LAYOUT,	\
-		 struct drm_vmw_update_layout_arg)
-
+#define DRM_IOCTL_VMW_FENCE_SIGNALED				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_SIGNALED,	\
+		 struct drm_vmw_fence_signaled_arg)
+#define DRM_IOCTL_VMW_FENCE_UNREF				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_UNREF,		\
+		 struct drm_vmw_fence_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -135,12 +137,15 @@
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
-		      DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED)
+	VMW_IOCTL_DEF(VMW_FENCE_SIGNALED,
+		      vmw_fence_obj_signaled_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
 };
 
 static struct pci_device_id vmw_pci_id_list[] = {
@@ -189,6 +194,10 @@
 		DRM_INFO("  GMR.\n");
 	if (capabilities & SVGA_CAP_TRACES)
 		DRM_INFO("  Traces.\n");
+	if (capabilities & SVGA_CAP_GMR2)
+		DRM_INFO("  GMR2.\n");
+	if (capabilities & SVGA_CAP_SCREEN_OBJECT_2)
+		DRM_INFO("  Screen Object 2.\n");
 }
 
 static int vmw_request_device(struct vmw_private *dev_priv)
@@ -200,16 +209,25 @@
 		DRM_ERROR("Unable to initialize FIFO.\n");
 		return ret;
 	}
+	vmw_fence_fifo_up(dev_priv->fman);
 
 	return 0;
 }
 
 static void vmw_release_device(struct vmw_private *dev_priv)
 {
+	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
 
-int vmw_3d_resource_inc(struct vmw_private *dev_priv)
+/**
+ * Increase the 3d resource refcount.
+ * If the count was prevously zero, initialize the fifo, switching to svga
+ * mode. Note that the master holds a ref as well, and may request an
+ * explicit switch to svga mode if fb is not running, using @unhide_svga.
+ */
+int vmw_3d_resource_inc(struct vmw_private *dev_priv,
+			bool unhide_svga)
 {
 	int ret = 0;
 
@@ -218,19 +236,42 @@
 		ret = vmw_request_device(dev_priv);
 		if (unlikely(ret != 0))
 			--dev_priv->num_3d_resources;
+	} else if (unhide_svga) {
+		mutex_lock(&dev_priv->hw_mutex);
+		vmw_write(dev_priv, SVGA_REG_ENABLE,
+			  vmw_read(dev_priv, SVGA_REG_ENABLE) &
+			  ~SVGA_REG_ENABLE_HIDE);
+		mutex_unlock(&dev_priv->hw_mutex);
 	}
+
 	mutex_unlock(&dev_priv->release_mutex);
 	return ret;
 }
 
-
-void vmw_3d_resource_dec(struct vmw_private *dev_priv)
+/**
+ * Decrease the 3d resource refcount.
+ * If the count reaches zero, disable the fifo, switching to vga mode.
+ * Note that the master holds a refcount as well, and may request an
+ * explicit switch to vga mode when it releases its refcount to account
+ * for the situation of an X server vt switch to VGA with 3d resources
+ * active.
+ */
+void vmw_3d_resource_dec(struct vmw_private *dev_priv,
+			 bool hide_svga)
 {
 	int32_t n3d;
 
 	mutex_lock(&dev_priv->release_mutex);
 	if (unlikely(--dev_priv->num_3d_resources == 0))
 		vmw_release_device(dev_priv);
+	else if (hide_svga) {
+		mutex_lock(&dev_priv->hw_mutex);
+		vmw_write(dev_priv, SVGA_REG_ENABLE,
+			  vmw_read(dev_priv, SVGA_REG_ENABLE) |
+			  SVGA_REG_ENABLE_HIDE);
+		mutex_unlock(&dev_priv->hw_mutex);
+	}
+
 	n3d = (int32_t) dev_priv->num_3d_resources;
 	mutex_unlock(&dev_priv->release_mutex);
 
@@ -252,7 +293,7 @@
 
 	dev_priv->dev = dev;
 	dev_priv->vmw_chipset = chipset;
-	dev_priv->last_read_sequence = (uint32_t) -100;
+	dev_priv->last_read_seqno = (uint32_t) -100;
 	mutex_init(&dev_priv->hw_mutex);
 	mutex_init(&dev_priv->cmdbuf_mutex);
 	mutex_init(&dev_priv->release_mutex);
@@ -263,7 +304,7 @@
 	mutex_init(&dev_priv->init_mutex);
 	init_waitqueue_head(&dev_priv->fence_queue);
 	init_waitqueue_head(&dev_priv->fifo_queue);
-	atomic_set(&dev_priv->fence_queue_waiters, 0);
+	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
@@ -292,6 +333,12 @@
 		dev_priv->max_gmr_ids =
 			vmw_read(dev_priv, SVGA_REG_GMR_MAX_IDS);
 	}
+	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
+		dev_priv->max_gmr_pages =
+			vmw_read(dev_priv, SVGA_REG_GMRS_MAX_PAGES);
+		dev_priv->memory_size =
+			vmw_read(dev_priv, SVGA_REG_MEMORY_SIZE);
+	}
 
 	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
 	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
@@ -308,6 +355,12 @@
 		DRM_INFO("Max GMR descriptors is %u\n",
 			 (unsigned)dev_priv->max_gmr_descriptors);
 	}
+	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
+		DRM_INFO("Max number of GMR pages is %u\n",
+			 (unsigned)dev_priv->max_gmr_pages);
+		DRM_INFO("Max dedicated hypervisor graphics memory is %u\n",
+			 (unsigned)dev_priv->memory_size);
+	}
 	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
 		 dev_priv->vram_start, dev_priv->vram_size / 1024);
 	DRM_INFO("MMIO at 0x%08x size is %u kiB\n",
@@ -394,12 +447,16 @@
 			goto out_no_device;
 		}
 	}
+
+	dev_priv->fman = vmw_fence_manager_init(dev_priv);
+	if (unlikely(dev_priv->fman == NULL))
+		goto out_no_fman;
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
 	vmw_overlay_init(dev_priv);
 	if (dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv);
+		ret = vmw_3d_resource_inc(dev_priv, false);
 		if (unlikely(ret != 0))
 			goto out_no_fifo;
 		vmw_kms_save_vga(dev_priv);
@@ -429,12 +486,14 @@
 	if (dev_priv->enable_fb) {
 		vmw_fb_close(dev_priv);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, false);
 	}
 out_no_fifo:
 	vmw_overlay_close(dev_priv);
 	vmw_kms_close(dev_priv);
 out_no_kms:
+	vmw_fence_manager_takedown(dev_priv->fman);
+out_no_fman:
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -467,15 +526,18 @@
 
 	unregister_pm_notifier(&dev_priv->pm_nb);
 
+	if (dev_priv->ctx.cmd_bounce)
+		vfree(dev_priv->ctx.cmd_bounce);
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
 		drm_irq_uninstall(dev_priv->dev);
 	if (dev_priv->enable_fb) {
 		vmw_fb_close(dev_priv);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, false);
 	}
 	vmw_kms_close(dev_priv);
 	vmw_overlay_close(dev_priv);
+	vmw_fence_manager_takedown(dev_priv->fman);
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -646,7 +708,7 @@
 	int ret = 0;
 
 	if (!dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv);
+		ret = vmw_3d_resource_inc(dev_priv, true);
 		if (unlikely(ret != 0))
 			return ret;
 		vmw_kms_save_vga(dev_priv);
@@ -688,7 +750,7 @@
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 	return ret;
 }
@@ -726,7 +788,7 @@
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	dev_priv->active_master = &dev_priv->fbdev_master;
@@ -835,7 +897,7 @@
 	 */
 	dev_priv->suspended = true;
 	if (dev_priv->enable_fb)
-		vmw_3d_resource_dec(dev_priv);
+			vmw_3d_resource_dec(dev_priv, true);
 
 	if (dev_priv->num_3d_resources != 0) {
 
@@ -843,7 +905,7 @@
 			 "while 3D resources are active.\n");
 
 		if (dev_priv->enable_fb)
-			vmw_3d_resource_inc(dev_priv);
+			vmw_3d_resource_inc(dev_priv, true);
 		dev_priv->suspended = false;
 		return -EBUSY;
 	}
@@ -862,7 +924,7 @@
 	 * start fifo.
 	 */
 	if (dev_priv->enable_fb)
-		vmw_3d_resource_inc(dev_priv);
+			vmw_3d_resource_inc(dev_priv, false);
 
 	dev_priv->suspended = false;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 10fc01f..564a815 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -38,20 +38,27 @@
 #include "ttm/ttm_lock.h"
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
+#include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20100927"
-#define VMWGFX_DRIVER_MAJOR 1
-#define VMWGFX_DRIVER_MINOR 4
+#define VMWGFX_DRIVER_DATE "20110901"
+#define VMWGFX_DRIVER_MAJOR 2
+#define VMWGFX_DRIVER_MINOR 0
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
-#define VMWGFX_MAX_GMRS 2048
+#define VMWGFX_MAX_VALIDATIONS 2048
 #define VMWGFX_MAX_DISPLAYS 16
+#define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768
 
 #define VMW_PL_GMR TTM_PL_PRIV0
 #define VMW_PL_FLAG_GMR TTM_PL_FLAG_PRIV0
 
+#define VMW_RES_CONTEXT ttm_driver_type0
+#define VMW_RES_SURFACE ttm_driver_type1
+#define VMW_RES_STREAM ttm_driver_type2
+#define VMW_RES_FENCE ttm_driver_type3
+
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
@@ -74,7 +81,7 @@
 	bool avail;
 	void (*hw_destroy) (struct vmw_resource *res);
 	void (*res_free) (struct vmw_resource *res);
-
+	bool on_validate_list;
 	/* TODO is a generic snooper needed? */
 #if 0
 	void (*snoop)(struct vmw_resource *res,
@@ -104,7 +111,7 @@
 	struct vmw_cursor_snooper snooper;
 };
 
-struct vmw_fence_queue {
+struct vmw_marker_queue {
 	struct list_head head;
 	struct timespec lag;
 	struct timespec lag_time;
@@ -115,16 +122,12 @@
 	unsigned long reserved_size;
 	__le32 *dynamic_buffer;
 	__le32 *static_buffer;
-	__le32 *last_buffer;
-	uint32_t last_data_size;
-	uint32_t last_buffer_size;
-	bool last_buffer_add;
 	unsigned long static_buffer_size;
 	bool using_bounce_buffer;
 	uint32_t capabilities;
 	struct mutex fifo_mutex;
 	struct rw_semaphore rwsem;
-	struct vmw_fence_queue fence_queue;
+	struct vmw_marker_queue marker_queue;
 };
 
 struct vmw_relocation {
@@ -143,8 +146,12 @@
 	struct list_head validate_nodes;
 	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
 	uint32_t cur_reloc;
-	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_GMRS];
+	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_VALIDATIONS];
 	uint32_t cur_val_buf;
+	uint32_t *cmd_bounce;
+	uint32_t cmd_bounce_size;
+	struct vmw_resource *resources[VMWGFX_MAX_VALIDATIONS];
+	uint32_t num_ref_resources;
 };
 
 struct vmw_legacy_display;
@@ -185,6 +192,8 @@
 	uint32_t capabilities;
 	uint32_t max_gmr_descriptors;
 	uint32_t max_gmr_ids;
+	uint32_t max_gmr_pages;
+	uint32_t memory_size;
 	bool has_gmr;
 	struct mutex hw_mutex;
 
@@ -195,12 +204,7 @@
 	struct vmw_vga_topology_state vga_save[VMWGFX_MAX_DISPLAYS];
 	uint32_t vga_width;
 	uint32_t vga_height;
-	uint32_t vga_depth;
 	uint32_t vga_bpp;
-	uint32_t vga_pseudo;
-	uint32_t vga_red_mask;
-	uint32_t vga_green_mask;
-	uint32_t vga_blue_mask;
 	uint32_t vga_bpl;
 	uint32_t vga_pitchlock;
 
@@ -240,13 +244,14 @@
 	 * Fencing and IRQs.
 	 */
 
-	atomic_t fence_seq;
+	atomic_t marker_seq;
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
-	atomic_t fence_queue_waiters;
+	int fence_queue_waiters; /* Protected by hw_mutex */
 	atomic_t fifo_queue_waiters;
-	uint32_t last_read_sequence;
+	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
+	struct vmw_fence_manager *fman;
 
 	/*
 	 * Device state
@@ -319,8 +324,8 @@
 	return val;
 }
 
-int vmw_3d_resource_inc(struct vmw_private *dev_priv);
-void vmw_3d_resource_dec(struct vmw_private *dev_priv);
+int vmw_3d_resource_inc(struct vmw_private *dev_priv, bool unhide_svga);
+void vmw_3d_resource_dec(struct vmw_private *dev_priv, bool hide_svga);
 
 /**
  * GMR utilities - vmwgfx_gmr.c
@@ -345,7 +350,8 @@
 				    struct drm_file *file_priv);
 extern int vmw_context_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
-			     int id);
+			     int id,
+			     struct vmw_resource **p_res);
 extern void vmw_surface_res_free(struct vmw_resource *res);
 extern int vmw_surface_init(struct vmw_private *dev_priv,
 			    struct vmw_surface *srf,
@@ -398,7 +404,7 @@
 
 extern int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
-extern int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+extern int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 
 /**
@@ -412,9 +418,8 @@
 extern void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes);
 extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
 extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
-			       uint32_t *sequence);
+			       uint32_t *seqno);
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
-extern int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
 
@@ -450,39 +455,38 @@
  */
 
 extern irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS);
-extern int vmw_wait_fence(struct vmw_private *dev_priv, bool lazy,
-			  uint32_t sequence, bool interruptible,
-			  unsigned long timeout);
+extern int vmw_wait_seqno(struct vmw_private *dev_priv, bool lazy,
+			     uint32_t seqno, bool interruptible,
+			     unsigned long timeout);
 extern void vmw_irq_preinstall(struct drm_device *dev);
 extern int vmw_irq_postinstall(struct drm_device *dev);
 extern void vmw_irq_uninstall(struct drm_device *dev);
-extern bool vmw_fence_signaled(struct vmw_private *dev_priv,
-			       uint32_t sequence);
-extern int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
+extern bool vmw_seqno_passed(struct vmw_private *dev_priv,
+				uint32_t seqno);
 extern int vmw_fallback_wait(struct vmw_private *dev_priv,
 			     bool lazy,
 			     bool fifo_idle,
-			     uint32_t sequence,
+			     uint32_t seqno,
 			     bool interruptible,
 			     unsigned long timeout);
-extern void vmw_update_sequence(struct vmw_private *dev_priv,
+extern void vmw_update_seqno(struct vmw_private *dev_priv,
 				struct vmw_fifo_state *fifo_state);
-
+extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
 
 /**
- * Rudimentary fence objects currently used only for throttling -
- * vmwgfx_fence.c
+ * Rudimentary fence-like objects currently used only for throttling -
+ * vmwgfx_marker.c
  */
 
-extern void vmw_fence_queue_init(struct vmw_fence_queue *queue);
-extern void vmw_fence_queue_takedown(struct vmw_fence_queue *queue);
-extern int vmw_fence_push(struct vmw_fence_queue *queue,
-			  uint32_t sequence);
-extern int vmw_fence_pull(struct vmw_fence_queue *queue,
-			  uint32_t signaled_sequence);
+extern void vmw_marker_queue_init(struct vmw_marker_queue *queue);
+extern void vmw_marker_queue_takedown(struct vmw_marker_queue *queue);
+extern int vmw_marker_push(struct vmw_marker_queue *queue,
+			  uint32_t seqno);
+extern int vmw_marker_pull(struct vmw_marker_queue *queue,
+			  uint32_t signaled_seqno);
 extern int vmw_wait_lag(struct vmw_private *dev_priv,
-			struct vmw_fence_queue *queue, uint32_t us);
+			struct vmw_marker_queue *queue, uint32_t us);
 
 /**
  * Kernel framebuffer - vmwgfx_fb.c
@@ -508,11 +512,9 @@
 			  struct ttm_object_file *tfile,
 			  struct ttm_buffer_object *bo,
 			  SVGA3dCmdHeader *header);
-void vmw_kms_write_svga(struct vmw_private *vmw_priv,
-			unsigned width, unsigned height, unsigned pitch,
-			unsigned bbp, unsigned depth);
-int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
+int vmw_kms_write_svga(struct vmw_private *vmw_priv,
+		       unsigned width, unsigned height, unsigned pitch,
+		       unsigned bpp, unsigned depth);
 void vmw_kms_idle_workqueues(struct vmw_master *vmaster);
 bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
@@ -576,4 +578,8 @@
 	return NULL;
 }
 
+static inline struct ttm_mem_global *vmw_mem_glob(struct vmw_private *dev_priv)
+{
+	return (struct ttm_mem_global *) dev_priv->mem_global_ref.object;
+}
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 41b95ed..fa26e64 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,10 +44,36 @@
 	return 0;
 }
 
+
+static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
+					 struct vmw_resource **p_res)
+{
+	int ret = 0;
+	struct vmw_resource *res = *p_res;
+
+	if (!res->on_validate_list) {
+		if (sw_context->num_ref_resources >= VMWGFX_MAX_VALIDATIONS) {
+			DRM_ERROR("Too many resources referenced in "
+				  "command stream.\n");
+			ret = -ENOMEM;
+			goto out;
+		}
+		sw_context->resources[sw_context->num_ref_resources++] = res;
+		res->on_validate_list = true;
+		return 0;
+	}
+
+out:
+	vmw_resource_unreference(p_res);
+	return ret;
+}
+
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
 {
+	struct vmw_resource *ctx;
+
 	struct vmw_cid_cmd {
 		SVGA3dCmdHeader header;
 		__le32 cid;
@@ -58,7 +84,8 @@
 	if (likely(sw_context->cid_valid && cmd->cid == sw_context->last_cid))
 		return 0;
 
-	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid);
+	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid,
+				&ctx);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use context %u\n",
 			  (unsigned) cmd->cid);
@@ -67,39 +94,43 @@
 
 	sw_context->last_cid = cmd->cid;
 	sw_context->cid_valid = true;
-
-	return 0;
+	return vmw_resource_to_validate_list(sw_context, &ctx);
 }
 
 static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     uint32_t *sid)
 {
+	struct vmw_surface *srf;
+	int ret;
+	struct vmw_resource *res;
+
 	if (*sid == SVGA3D_INVALID_ID)
 		return 0;
 
-	if (unlikely((!sw_context->sid_valid  ||
-		      *sid != sw_context->last_sid))) {
-		int real_id;
-		int ret = vmw_surface_check(dev_priv, sw_context->tfile,
-					    *sid, &real_id);
-
-		if (unlikely(ret != 0)) {
-			DRM_ERROR("Could ot find or use surface 0x%08x "
-				  "address 0x%08lx\n",
-				  (unsigned int) *sid,
-				  (unsigned long) sid);
-			return ret;
-		}
-
-		sw_context->last_sid = *sid;
-		sw_context->sid_valid = true;
-		*sid = real_id;
-		sw_context->sid_translation = real_id;
-	} else
+	if (likely((sw_context->sid_valid  &&
+		      *sid == sw_context->last_sid))) {
 		*sid = sw_context->sid_translation;
+		return 0;
+	}
 
-	return 0;
+	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
+					     *sid, &srf);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could ot find or use surface 0x%08x "
+			  "address 0x%08lx\n",
+			  (unsigned int) *sid,
+			  (unsigned long) sid);
+		return ret;
+	}
+
+	sw_context->last_sid = *sid;
+	sw_context->sid_valid = true;
+	sw_context->sid_translation = srf->res.id;
+	*sid = sw_context->sid_translation;
+
+	res = &srf->res;
+	return vmw_resource_to_validate_list(sw_context, &res);
 }
 
 
@@ -213,7 +244,7 @@
 	reloc->location = ptr;
 
 	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
+	if (unlikely(cur_validate_node >= VMWGFX_MAX_VALIDATIONS)) {
 		DRM_ERROR("Max number of DMA buffers per submission"
 			  " exceeded.\n");
 		ret = -EINVAL;
@@ -224,7 +255,8 @@
 	if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
 		val_buf = &sw_context->val_bufs[cur_validate_node];
 		val_buf->bo = ttm_bo_reference(bo);
-		val_buf->new_sync_obj_arg = (void *) dev_priv;
+		val_buf->usage = TTM_USAGE_READWRITE;
+		val_buf->new_sync_obj_arg = (void *) DRM_VMW_FENCE_FLAG_EXEC;
 		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 		++sw_context->cur_val_buf;
 	}
@@ -289,7 +321,6 @@
 	return 0;
 }
 
-
 static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		       struct vmw_sw_context *sw_context,
 		       SVGA3dCmdHeader *header)
@@ -302,6 +333,7 @@
 		SVGA3dCmdSurfaceDMA dma;
 	} *cmd;
 	int ret;
+	struct vmw_resource *res;
 
 	cmd = container_of(header, struct vmw_dma_cmd, header);
 	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
@@ -318,17 +350,16 @@
 		goto out_no_reloc;
 	}
 
-	/**
+	/*
 	 * Patch command stream with device SID.
 	 */
-
 	cmd->dma.host.sid = srf->res.id;
 	vmw_kms_cursor_snoop(srf, sw_context->tfile, bo, header);
-	/**
-	 * FIXME: May deadlock here when called from the
-	 * command parsing code.
-	 */
-	vmw_surface_unreference(&srf);
+
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	res = &srf->res;
+	return vmw_resource_to_validate_list(sw_context, &res);
 
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
@@ -500,8 +531,9 @@
 
 static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
-			     void *buf, uint32_t size)
+			     uint32_t size)
 {
+	void *buf = sw_context->cmd_bounce;
 	int32_t cur_size = size;
 	int ret;
 
@@ -550,7 +582,11 @@
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
 	struct ttm_validate_buffer *entry, *next;
+	uint32_t i = sw_context->num_ref_resources;
 
+	/*
+	 * Drop references to DMA buffers held during command submission.
+	 */
 	list_for_each_entry_safe(entry, next, &sw_context->validate_nodes,
 				 head) {
 		list_del(&entry->head);
@@ -559,6 +595,14 @@
 		sw_context->cur_val_buf--;
 	}
 	BUG_ON(sw_context->cur_val_buf != 0);
+
+	/*
+	 * Drop references to resources held during command submission.
+	 */
+	while (i-- > 0) {
+		sw_context->resources[i]->on_validate_list = false;
+		vmw_resource_unreference(&sw_context->resources[i]);
+	}
 }
 
 static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
@@ -602,6 +646,79 @@
 	return 0;
 }
 
+static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
+				 uint32_t size)
+{
+	if (likely(sw_context->cmd_bounce_size >= size))
+		return 0;
+
+	if (sw_context->cmd_bounce_size == 0)
+		sw_context->cmd_bounce_size = VMWGFX_CMD_BOUNCE_INIT_SIZE;
+
+	while (sw_context->cmd_bounce_size < size) {
+		sw_context->cmd_bounce_size =
+			PAGE_ALIGN(sw_context->cmd_bounce_size +
+				   (sw_context->cmd_bounce_size >> 1));
+	}
+
+	if (sw_context->cmd_bounce != NULL)
+		vfree(sw_context->cmd_bounce);
+
+	sw_context->cmd_bounce = vmalloc(sw_context->cmd_bounce_size);
+
+	if (sw_context->cmd_bounce == NULL) {
+		DRM_ERROR("Failed to allocate command bounce buffer.\n");
+		sw_context->cmd_bounce_size = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_execbuf_fence_commands - create and submit a command stream fence
+ *
+ * Creates a fence object and submits a command stream marker.
+ * If this fails for some reason, We sync the fifo and return NULL.
+ * It is then safe to fence buffers with a NULL pointer.
+ */
+
+int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_fence_obj **p_fence,
+			       uint32_t *p_handle)
+{
+	uint32_t sequence;
+	int ret;
+	bool synced = false;
+
+
+	ret = vmw_fifo_send_fence(dev_priv, &sequence);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Fence submission error. Syncing.\n");
+		synced = true;
+	}
+
+	if (p_handle != NULL)
+		ret = vmw_user_fence_create(file_priv, dev_priv->fman,
+					    sequence,
+					    DRM_VMW_FENCE_FLAG_EXEC,
+					    p_fence, p_handle);
+	else
+		ret = vmw_fence_create(dev_priv->fman, sequence,
+				       DRM_VMW_FENCE_FLAG_EXEC,
+				       p_fence);
+
+	if (unlikely(ret != 0 && !synced)) {
+		(void) vmw_fallback_wait(dev_priv, false, false,
+					 sequence, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+		*p_fence = NULL;
+	}
+
+	return 0;
+}
+
 int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
@@ -612,9 +729,24 @@
 	int ret;
 	void *user_cmd;
 	void *cmd;
-	uint32_t sequence;
 	struct vmw_sw_context *sw_context = &dev_priv->ctx;
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct vmw_fence_obj *fence;
+	uint32_t handle;
+
+	/*
+	 * This will allow us to extend the ioctl argument while
+	 * maintaining backwards compatibility:
+	 * We take different code paths depending on the value of
+	 * arg->version.
+	 */
+
+	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
+		DRM_ERROR("Incorrect execbuf version.\n");
+		DRM_ERROR("You're running outdated experimental "
+			  "vmwgfx user-space drivers.");
+		return -EINVAL;
+	}
 
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
@@ -626,20 +758,18 @@
 		goto out_no_cmd_mutex;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving fifo space for commands.\n");
-		ret = -ENOMEM;
+	ret = vmw_resize_cmd_bounce(sw_context, arg->command_size);
+	if (unlikely(ret != 0))
 		goto out_unlock;
-	}
 
 	user_cmd = (void __user *)(unsigned long)arg->commands;
-	ret = copy_from_user(cmd, user_cmd, arg->command_size);
+	ret = copy_from_user(sw_context->cmd_bounce,
+			     user_cmd, arg->command_size);
 
 	if (unlikely(ret != 0)) {
 		ret = -EFAULT;
 		DRM_ERROR("Failed copying commands.\n");
-		goto out_commit;
+		goto out_unlock;
 	}
 
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
@@ -647,12 +777,14 @@
 	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
+	sw_context->num_ref_resources = 0;
 
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
 
-	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
+	ret = vmw_cmd_check_all(dev_priv, sw_context, arg->command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
+
 	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
@@ -664,53 +796,86 @@
 	vmw_apply_relocations(sw_context);
 
 	if (arg->throttle_us) {
-		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.fence_queue,
+		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
 				   arg->throttle_us);
 
 		if (unlikely(ret != 0))
-			goto out_err;
+			goto out_throttle;
 	}
 
+	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving fifo space for commands.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	memcpy(cmd, sw_context->cmd_bounce, arg->command_size);
 	vmw_fifo_commit(dev_priv, arg->command_size);
 
-	ret = vmw_fifo_send_fence(dev_priv, &sequence);
-
-	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
-				    (void *)(unsigned long) sequence);
-	vmw_clear_validations(sw_context);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-
+	user_fence_rep = (struct drm_vmw_fence_rep __user *)
+		(unsigned long)arg->fence_rep;
+	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+					 &fence,
+					 (user_fence_rep) ? &handle : NULL);
 	/*
 	 * This error is harmless, because if fence submission fails,
-	 * vmw_fifo_send_fence will sync.
+	 * vmw_fifo_send_fence will sync. The error will be propagated to
+	 * user-space in @fence_rep
 	 */
 
 	if (ret != 0)
 		DRM_ERROR("Fence submission error. Syncing.\n");
 
-	fence_rep.error = ret;
-	fence_rep.fence_seq = (uint64_t) sequence;
-	fence_rep.pad64 = 0;
+	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
+				    (void *) fence);
 
-	user_fence_rep = (struct drm_vmw_fence_rep __user *)
-	    (unsigned long)arg->fence_rep;
+	vmw_clear_validations(sw_context);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
-	/*
-	 * copy_to_user errors will be detected by user space not
-	 * seeing fence_rep::error filled in.
-	 */
+	if (user_fence_rep) {
+		fence_rep.error = ret;
+		fence_rep.handle = handle;
+		fence_rep.seqno = fence->seqno;
+		vmw_update_seqno(dev_priv, &dev_priv->fifo);
+		fence_rep.passed_seqno = dev_priv->last_read_seqno;
 
-	ret = copy_to_user(user_fence_rep, &fence_rep, sizeof(fence_rep));
+		/*
+		 * copy_to_user errors will be detected by user space not
+		 * seeing fence_rep::error filled in. Typically
+		 * user-space would have pre-set that member to -EFAULT.
+		 */
+		ret = copy_to_user(user_fence_rep, &fence_rep,
+				   sizeof(fence_rep));
+
+		/*
+		 * User-space lost the fence object. We need to sync
+		 * and unreference the handle.
+		 */
+		if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+			BUG_ON(fence == NULL);
+
+			ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+						  handle, TTM_REF_USAGE);
+			DRM_ERROR("Fence copy error. Syncing.\n");
+			(void) vmw_fence_obj_wait(fence,
+						  fence->signal_mask,
+						  false, false,
+						  VMW_FENCE_WAIT_TIMEOUT);
+		}
+	}
+
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
 
 	vmw_kms_cursor_post_execbuf(dev_priv);
 	ttm_read_unlock(&vmaster->lock);
 	return 0;
 out_err:
 	vmw_free_relocations(sw_context);
+out_throttle:
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
 	vmw_clear_validations(sw_context);
-out_commit:
-	vmw_fifo_commit(dev_priv, 0);
 out_unlock:
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 out_no_cmd_mutex:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index bfab60c..b1888e8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -158,10 +158,14 @@
 {
 	struct vmw_fb_par *par = info->par;
 	struct vmw_private *vmw_priv = par->vmw_priv;
+	int ret;
 
-	vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres,
-			   info->fix.line_length,
-			   par->bpp, par->depth);
+	ret = vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres,
+				 info->fix.line_length,
+				 par->bpp, par->depth);
+	if (ret)
+		return ret;
+
 	if (vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY) {
 		/* TODO check if pitch and offset changes */
 		vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
@@ -405,14 +409,14 @@
 	struct fb_info *info;
 	unsigned initial_width, initial_height;
 	unsigned fb_width, fb_height;
-	unsigned fb_bbp, fb_depth, fb_offset, fb_pitch, fb_size;
+	unsigned fb_bpp, fb_depth, fb_offset, fb_pitch, fb_size;
 	int ret;
 
 	/* XXX These shouldn't be hardcoded. */
 	initial_width = 800;
 	initial_height = 600;
 
-	fb_bbp = 32;
+	fb_bpp = 32;
 	fb_depth = 24;
 
 	/* XXX As shouldn't these be as well. */
@@ -422,7 +426,7 @@
 	initial_width = min(fb_width, initial_width);
 	initial_height = min(fb_height, initial_height);
 
-	fb_pitch = fb_width * fb_bbp / 8;
+	fb_pitch = fb_width * fb_bpp / 8;
 	fb_size = fb_pitch * fb_height;
 	fb_offset = vmw_read(vmw_priv, SVGA_REG_FB_OFFSET);
 
@@ -437,7 +441,7 @@
 	par = info->par;
 	par->vmw_priv = vmw_priv;
 	par->depth = fb_depth;
-	par->bpp = fb_bbp;
+	par->bpp = fb_bpp;
 	par->vmalloc = NULL;
 	par->max_width = fb_width;
 	par->max_height = fb_height;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 61eacc1..5065a14 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright (C) 2010 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,149 +25,595 @@
  *
  **************************************************************************/
 
-
+#include "drmP.h"
 #include "vmwgfx_drv.h"
 
-struct vmw_fence {
-	struct list_head head;
-	uint32_t sequence;
-	struct timespec submitted;
+#define VMW_FENCE_WRAP (1 << 31)
+
+struct vmw_fence_manager {
+	int num_fence_objects;
+	struct vmw_private *dev_priv;
+	spinlock_t lock;
+	u32 next_seqno;
+	struct list_head fence_list;
+	struct work_struct work;
+	u32 user_fence_size;
+	u32 fence_size;
+	bool fifo_down;
+	struct list_head cleanup_list;
 };
 
-void vmw_fence_queue_init(struct vmw_fence_queue *queue)
-{
-	INIT_LIST_HEAD(&queue->head);
-	queue->lag = ns_to_timespec(0);
-	getrawmonotonic(&queue->lag_time);
-	spin_lock_init(&queue->lock);
-}
+struct vmw_user_fence {
+	struct ttm_base_object base;
+	struct vmw_fence_obj fence;
+};
 
-void vmw_fence_queue_takedown(struct vmw_fence_queue *queue)
-{
-	struct vmw_fence *fence, *next;
+/**
+ * vmw_fence_destroy_locked
+ *
+ */
 
-	spin_lock(&queue->lock);
-	list_for_each_entry_safe(fence, next, &queue->head, head) {
+static void vmw_fence_obj_destroy_locked(struct kref *kref)
+{
+	struct vmw_fence_obj *fence =
+		container_of(kref, struct vmw_fence_obj, kref);
+
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned int num_fences;
+
+	list_del_init(&fence->head);
+	num_fences = --fman->num_fence_objects;
+	spin_unlock_irq(&fman->lock);
+	if (fence->destroy)
+		fence->destroy(fence);
+	else
 		kfree(fence);
-	}
-	spin_unlock(&queue->lock);
+
+	spin_lock_irq(&fman->lock);
 }
 
-int vmw_fence_push(struct vmw_fence_queue *queue,
-		   uint32_t sequence)
+
+/**
+ * Execute signal actions on fences recently signaled.
+ * This is done from a workqueue so we don't have to execute
+ * signal actions from atomic context.
+ */
+
+static void vmw_fence_work_func(struct work_struct *work)
 {
-	struct vmw_fence *fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	struct vmw_fence_manager *fman =
+		container_of(work, struct vmw_fence_manager, work);
+	struct list_head list;
+	struct vmw_fence_action *action, *next_action;
 
-	if (unlikely(!fence))
-		return -ENOMEM;
+	do {
+		INIT_LIST_HEAD(&list);
+		spin_lock_irq(&fman->lock);
+		list_splice_init(&fman->cleanup_list, &list);
+		spin_unlock_irq(&fman->lock);
 
-	fence->sequence = sequence;
-	getrawmonotonic(&fence->submitted);
-	spin_lock(&queue->lock);
-	list_add_tail(&fence->head, &queue->head);
-	spin_unlock(&queue->lock);
+		if (list_empty(&list))
+			return;
 
-	return 0;
+		/*
+		 * At this point, only we should be able to manipulate the
+		 * list heads of the actions we have on the private list.
+		 */
+
+		list_for_each_entry_safe(action, next_action, &list, head) {
+			list_del_init(&action->head);
+			action->cleanup(action);
+		}
+	} while (1);
 }
 
-int vmw_fence_pull(struct vmw_fence_queue *queue,
-		   uint32_t signaled_sequence)
+struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 {
-	struct vmw_fence *fence, *next;
-	struct timespec now;
-	bool updated = false;
+	struct vmw_fence_manager *fman = kzalloc(sizeof(*fman), GFP_KERNEL);
 
-	spin_lock(&queue->lock);
-	getrawmonotonic(&now);
+	if (unlikely(fman == NULL))
+		return NULL;
 
-	if (list_empty(&queue->head)) {
-		queue->lag = ns_to_timespec(0);
-		queue->lag_time = now;
-		updated = true;
+	fman->dev_priv = dev_priv;
+	spin_lock_init(&fman->lock);
+	INIT_LIST_HEAD(&fman->fence_list);
+	INIT_LIST_HEAD(&fman->cleanup_list);
+	INIT_WORK(&fman->work, &vmw_fence_work_func);
+	fman->fifo_down = true;
+	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
+	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
+
+	return fman;
+}
+
+void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+	bool lists_empty;
+
+	(void) cancel_work_sync(&fman->work);
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	lists_empty = list_empty(&fman->fence_list) &&
+		list_empty(&fman->cleanup_list);
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	BUG_ON(!lists_empty);
+	kfree(fman);
+}
+
+static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
+			      struct vmw_fence_obj *fence,
+			      u32 seqno,
+			      uint32_t mask,
+			      void (*destroy) (struct vmw_fence_obj *fence))
+{
+	unsigned long irq_flags;
+	unsigned int num_fences;
+	int ret = 0;
+
+	fence->seqno = seqno;
+	INIT_LIST_HEAD(&fence->seq_passed_actions);
+	fence->fman = fman;
+	fence->signaled = 0;
+	fence->signal_mask = mask;
+	kref_init(&fence->kref);
+	fence->destroy = destroy;
+	init_waitqueue_head(&fence->queue);
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	if (unlikely(fman->fifo_down)) {
+		ret = -EBUSY;
 		goto out_unlock;
 	}
-
-	list_for_each_entry_safe(fence, next, &queue->head, head) {
-		if (signaled_sequence - fence->sequence > (1 << 30))
-			continue;
-
-		queue->lag = timespec_sub(now, fence->submitted);
-		queue->lag_time = now;
-		updated = true;
-		list_del(&fence->head);
-		kfree(fence);
-	}
+	list_add_tail(&fence->head, &fman->fence_list);
+	num_fences = ++fman->num_fence_objects;
 
 out_unlock:
-	spin_unlock(&queue->lock);
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	return ret;
 
-	return (updated) ? 0 : -EBUSY;
 }
 
-static struct timespec vmw_timespec_add(struct timespec t1,
-					struct timespec t2)
+struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
 {
-	t1.tv_sec += t2.tv_sec;
-	t1.tv_nsec += t2.tv_nsec;
-	if (t1.tv_nsec >= 1000000000L) {
-		t1.tv_sec += 1;
-		t1.tv_nsec -= 1000000000L;
+	kref_get(&fence->kref);
+	return fence;
+}
+
+/**
+ * vmw_fence_obj_unreference
+ *
+ * Note that this function may not be entered with disabled irqs since
+ * it may re-enable them in the destroy function.
+ *
+ */
+void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
+{
+	struct vmw_fence_obj *fence = *fence_p;
+	struct vmw_fence_manager *fman = fence->fman;
+
+	*fence_p = NULL;
+	spin_lock_irq(&fman->lock);
+	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
+	kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	spin_unlock_irq(&fman->lock);
+}
+
+void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
+				struct list_head *list)
+{
+	struct vmw_fence_action *action, *next_action;
+
+	list_for_each_entry_safe(action, next_action, list, head) {
+		list_del_init(&action->head);
+		if (action->seq_passed != NULL)
+			action->seq_passed(action);
+
+		/*
+		 * Add the cleanup action to the cleanup list so that
+		 * it will be performed by a worker task.
+		 */
+
+		if (action->cleanup != NULL)
+			list_add_tail(&action->head, &fman->cleanup_list);
+	}
+}
+
+void vmw_fences_update(struct vmw_fence_manager *fman, u32 seqno)
+{
+	unsigned long flags;
+	struct vmw_fence_obj *fence, *next_fence;
+	struct list_head action_list;
+
+	spin_lock_irqsave(&fman->lock, flags);
+	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
+		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		}
+
+	}
+	if (!list_empty(&fman->cleanup_list))
+		(void) schedule_work(&fman->work);
+	spin_unlock_irqrestore(&fman->lock, flags);
+}
+
+
+bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+			    uint32_t flags)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	uint32_t signaled;
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	flags &= fence->signal_mask;
+	if ((signaled & flags) == flags)
+		return 1;
+
+	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0) {
+		struct vmw_private *dev_priv = fman->dev_priv;
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		u32 seqno;
+
+		seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		vmw_fences_update(fman, seqno);
 	}
 
-	return t1;
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	return ((signaled & flags) == flags);
 }
 
-static struct timespec vmw_fifo_lag(struct vmw_fence_queue *queue)
+int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
+		       uint32_t flags, bool lazy,
+		       bool interruptible, unsigned long timeout)
 {
-	struct timespec now;
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
+	long ret;
 
-	spin_lock(&queue->lock);
-	getrawmonotonic(&now);
-	queue->lag = vmw_timespec_add(queue->lag,
-				      timespec_sub(now, queue->lag_time));
-	queue->lag_time = now;
-	spin_unlock(&queue->lock);
-	return queue->lag;
+	if (likely(vmw_fence_obj_signaled(fence, flags)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	vmw_seqno_waiter_add(dev_priv);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+	else
+		ret = wait_event_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+
+	vmw_seqno_waiter_remove(dev_priv);
+
+	if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
+
+	return ret;
 }
 
-
-static bool vmw_lag_lt(struct vmw_fence_queue *queue,
-		       uint32_t us)
+void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
 {
-	struct timespec lag, cond;
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
 
-	cond = ns_to_timespec((s64) us * 1000);
-	lag = vmw_fifo_lag(queue);
-	return (timespec_compare(&lag, &cond) < 1);
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 }
 
-int vmw_wait_lag(struct vmw_private *dev_priv,
-		 struct vmw_fence_queue *queue, uint32_t us)
+static void vmw_fence_destroy(struct vmw_fence_obj *fence)
 {
-	struct vmw_fence *fence;
-	uint32_t sequence;
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(fence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->fence_size);
+}
+
+int vmw_fence_create(struct vmw_fence_manager *fman,
+		     uint32_t seqno,
+		     uint32_t mask,
+		     struct vmw_fence_obj **p_fence)
+{
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	struct vmw_fence_obj *fence;
 	int ret;
 
-	while (!vmw_lag_lt(queue, us)) {
-		spin_lock(&queue->lock);
-		if (list_empty(&queue->head))
-			sequence = atomic_read(&dev_priv->fence_seq);
-		else {
-			fence = list_first_entry(&queue->head,
-						 struct vmw_fence, head);
-			sequence = fence->sequence;
-		}
-		spin_unlock(&queue->lock);
+	ret = ttm_mem_global_alloc(mem_glob, fman->fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
 
-		ret = vmw_wait_fence(dev_priv, false, sequence, true,
-				     3*HZ);
-
-		if (unlikely(ret != 0))
-			return ret;
-
-		(void) vmw_fence_pull(queue, sequence);
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (unlikely(fence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
 	}
+
+	ret = vmw_fence_obj_init(fman, fence, seqno, mask,
+				 vmw_fence_destroy);
+	if (unlikely(ret != 0))
+		goto out_err_init;
+
+	*p_fence = fence;
+	return 0;
+
+out_err_init:
+	kfree(fence);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->fence_size);
+	return ret;
+}
+
+
+static void vmw_user_fence_destroy(struct vmw_fence_obj *fence)
+{
+	struct vmw_user_fence *ufence =
+		container_of(fence, struct vmw_user_fence, fence);
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(ufence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->user_fence_size);
+}
+
+static void vmw_user_fence_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_fence *ufence =
+		container_of(base, struct vmw_user_fence, base);
+	struct vmw_fence_obj *fence = &ufence->fence;
+
+	*p_base = NULL;
+	vmw_fence_obj_unreference(&fence);
+}
+
+int vmw_user_fence_create(struct drm_file *file_priv,
+			  struct vmw_fence_manager *fman,
+			  uint32_t seqno,
+			  uint32_t mask,
+			  struct vmw_fence_obj **p_fence,
+			  uint32_t *p_handle)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_user_fence *ufence;
+	struct vmw_fence_obj *tmp;
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	int ret;
+
+	/*
+	 * Kernel memory space accounting, since this object may
+	 * be created by a user-space request.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ufence = kzalloc(sizeof(*ufence), GFP_KERNEL);
+	if (unlikely(ufence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
+	}
+
+	ret = vmw_fence_obj_init(fman, &ufence->fence, seqno,
+				 mask, vmw_user_fence_destroy);
+	if (unlikely(ret != 0)) {
+		kfree(ufence);
+		goto out_no_object;
+	}
+
+	/*
+	 * The base object holds a reference which is freed in
+	 * vmw_user_fence_base_release.
+	 */
+	tmp = vmw_fence_obj_reference(&ufence->fence);
+	ret = ttm_base_object_init(tfile, &ufence->base, false,
+				   VMW_RES_FENCE,
+				   &vmw_user_fence_base_release, NULL);
+
+
+	if (unlikely(ret != 0)) {
+		/*
+		 * Free the base object's reference
+		 */
+		vmw_fence_obj_unreference(&tmp);
+		goto out_err;
+	}
+
+	*p_fence = &ufence->fence;
+	*p_handle = ufence->base.hash.key;
+
+	return 0;
+out_err:
+	tmp = &ufence->fence;
+	vmw_fence_obj_unreference(&tmp);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->user_fence_size);
+	return ret;
+}
+
+
+/**
+ * vmw_fence_fifo_down - signal all unsignaled fence objects.
+ */
+
+void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+	struct list_head action_list;
+	int ret;
+
+	/*
+	 * The list may be altered while we traverse it, so always
+	 * restart when we've released the fman->lock.
+	 */
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = true;
+	while (!list_empty(&fman->fence_list)) {
+		struct vmw_fence_obj *fence =
+			list_entry(fman->fence_list.prev, struct vmw_fence_obj,
+				   head);
+		kref_get(&fence->kref);
+		spin_unlock_irq(&fman->lock);
+
+		ret = vmw_fence_obj_wait(fence, fence->signal_mask,
+					 false, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+
+		if (unlikely(ret != 0)) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		}
+
+		spin_lock_irq(&fman->lock);
+
+		BUG_ON(!list_empty(&fence->head));
+		kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	}
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
+
+void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = false;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
+
+
+int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_wait_arg *arg =
+	    (struct drm_vmw_fence_wait_arg *)data;
+	unsigned long timeout;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret;
+	uint64_t wait_timeout = ((uint64_t)arg->timeout_us * HZ);
+
+	/*
+	 * 64-bit division not present on 32-bit systems, so do an
+	 * approximation. (Divide by 1000000).
+	 */
+
+	wait_timeout = (wait_timeout >> 20) + (wait_timeout >> 24) -
+	  (wait_timeout >> 26);
+
+	if (!arg->cookie_valid) {
+		arg->cookie_valid = 1;
+		arg->kernel_cookie = jiffies + wait_timeout;
+	}
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Wait invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+
+	timeout = jiffies;
+	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie)) {
+		ret = ((vmw_fence_obj_signaled(fence, arg->flags)) ?
+		       0 : -EBUSY);
+		goto out;
+	}
+
+	timeout = (unsigned long)arg->kernel_cookie - timeout;
+
+	ret = vmw_fence_obj_wait(fence, arg->flags, arg->lazy, true, timeout);
+
+out:
+	ttm_base_object_unref(&base);
+
+	/*
+	 * Optionally unref the fence object.
+	 */
+
+	if (ret == 0 && (arg->wait_options & DRM_VMW_WAIT_OPTION_UNREF))
+		return ttm_ref_object_base_unref(tfile, arg->handle,
+						 TTM_REF_USAGE);
+	return ret;
+}
+
+int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_signaled_arg *arg =
+		(struct drm_vmw_fence_signaled_arg *) data;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct vmw_fence_manager *fman;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Fence signaled invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+	fman = fence->fman;
+
+	arg->signaled = vmw_fence_obj_signaled(fence, arg->flags);
+	spin_lock_irq(&fman->lock);
+
+	arg->signaled_flags = fence->signaled;
+	arg->passed_seqno = dev_priv->last_read_seqno;
+	spin_unlock_irq(&fman->lock);
+
+	ttm_base_object_unref(&base);
+
 	return 0;
 }
 
 
+int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_arg *arg =
+		(struct drm_vmw_fence_arg *) data;
+
+	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					 arg->handle,
+					 TTM_REF_USAGE);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
new file mode 100644
index 0000000..9307406
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_FENCE_H_
+
+#define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
+
+struct vmw_private;
+
+struct vmw_fence_manager;
+
+/**
+ *
+ *
+ */
+struct vmw_fence_action {
+	struct list_head head;
+	void (*seq_passed) (struct vmw_fence_action *action);
+	void (*cleanup) (struct vmw_fence_action *action);
+};
+
+struct vmw_fence_obj {
+	struct kref kref;
+	u32 seqno;
+
+	struct vmw_fence_manager *fman;
+	struct list_head head;
+	uint32_t signaled;
+	uint32_t signal_mask;
+	struct list_head seq_passed_actions;
+	void (*destroy)(struct vmw_fence_obj *fence);
+	wait_queue_head_t queue;
+};
+
+extern struct vmw_fence_manager *
+vmw_fence_manager_init(struct vmw_private *dev_priv);
+
+extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
+
+extern struct vmw_fence_obj *
+vmw_fence_obj_reference(struct vmw_fence_obj *fence);
+
+extern void vmw_fences_update(struct vmw_fence_manager *fman,
+			      u32 sequence);
+
+extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+				   uint32_t flags);
+
+extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
+			      bool lazy,
+			      bool interruptible, unsigned long timeout);
+
+extern void vmw_fence_obj_flush(struct vmw_fence_obj *fence);
+
+extern int vmw_fence_create(struct vmw_fence_manager *fman,
+			    uint32_t seqno,
+			    uint32_t mask,
+			    struct vmw_fence_obj **p_fence);
+
+extern int vmw_user_fence_create(struct drm_file *file_priv,
+				 struct vmw_fence_manager *fman,
+				 uint32_t sequence,
+				 uint32_t mask,
+				 struct vmw_fence_obj **p_fence,
+				 uint32_t *p_handle);
+
+extern void vmw_fence_fifo_up(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_fifo_down(struct vmw_fence_manager *fman);
+
+extern int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+
+extern int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+					struct drm_file *file_priv);
+
+extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+#endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 635c0ff..3ba9cac 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -72,22 +72,12 @@
 	uint32_t max;
 	uint32_t min;
 	uint32_t dummy;
-	int ret;
 
 	fifo->static_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
 	fifo->static_buffer = vmalloc(fifo->static_buffer_size);
 	if (unlikely(fifo->static_buffer == NULL))
 		return -ENOMEM;
 
-	fifo->last_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
-	fifo->last_data_size = 0;
-	fifo->last_buffer_add = false;
-	fifo->last_buffer = vmalloc(fifo->last_buffer_size);
-	if (unlikely(fifo->last_buffer == NULL)) {
-		ret = -ENOMEM;
-		goto out_err;
-	}
-
 	fifo->dynamic_buffer = NULL;
 	fifo->reserved_size = 0;
 	fifo->using_bounce_buffer = false;
@@ -137,14 +127,10 @@
 		 (unsigned int) min,
 		 (unsigned int) fifo->capabilities);
 
-	atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence);
-	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
-	vmw_fence_queue_init(&fifo->fence_queue);
+	atomic_set(&dev_priv->marker_seq, dev_priv->last_read_seqno);
+	iowrite32(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE);
+	vmw_marker_queue_init(&fifo->marker_queue);
 	return vmw_fifo_send_fence(dev_priv, &dummy);
-out_err:
-	vfree(fifo->static_buffer);
-	fifo->static_buffer = NULL;
-	return ret;
 }
 
 void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
@@ -170,7 +156,7 @@
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
 		vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 
-	dev_priv->last_read_sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 
 	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE,
 		  dev_priv->config_done_state);
@@ -180,12 +166,7 @@
 		  dev_priv->traces_state);
 
 	mutex_unlock(&dev_priv->hw_mutex);
-	vmw_fence_queue_takedown(&fifo->fence_queue);
-
-	if (likely(fifo->last_buffer != NULL)) {
-		vfree(fifo->last_buffer);
-		fifo->last_buffer = NULL;
-	}
+	vmw_marker_queue_takedown(&fifo->marker_queue);
 
 	if (likely(fifo->static_buffer != NULL)) {
 		vfree(fifo->static_buffer);
@@ -466,7 +447,7 @@
 	mutex_unlock(&fifo_state->fifo_mutex);
 }
 
-int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
+int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
 	struct svga_fifo_cmd_fence *cmd_fence;
@@ -476,16 +457,16 @@
 
 	fm = vmw_fifo_reserve(dev_priv, bytes);
 	if (unlikely(fm == NULL)) {
-		*sequence = atomic_read(&dev_priv->fence_seq);
+		*seqno = atomic_read(&dev_priv->marker_seq);
 		ret = -ENOMEM;
-		(void)vmw_fallback_wait(dev_priv, false, true, *sequence,
+		(void)vmw_fallback_wait(dev_priv, false, true, *seqno,
 					false, 3*HZ);
 		goto out_err;
 	}
 
 	do {
-		*sequence = atomic_add_return(1, &dev_priv->fence_seq);
-	} while (*sequence == 0);
+		*seqno = atomic_add_return(1, &dev_priv->marker_seq);
+	} while (*seqno == 0);
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) {
 
@@ -502,61 +483,11 @@
 	cmd_fence = (struct svga_fifo_cmd_fence *)
 	    ((unsigned long)fm + sizeof(__le32));
 
-	iowrite32(*sequence, &cmd_fence->fence);
-	fifo_state->last_buffer_add = true;
+	iowrite32(*seqno, &cmd_fence->fence);
 	vmw_fifo_commit(dev_priv, bytes);
-	fifo_state->last_buffer_add = false;
-	(void) vmw_fence_push(&fifo_state->fence_queue, *sequence);
-	vmw_update_sequence(dev_priv, fifo_state);
+	(void) vmw_marker_push(&fifo_state->marker_queue, *seqno);
+	vmw_update_seqno(dev_priv, fifo_state);
 
 out_err:
 	return ret;
 }
-
-/**
- * Map the first page of the FIFO read-only to user-space.
- */
-
-static int vmw_fifo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	int ret;
-	unsigned long address = (unsigned long)vmf->virtual_address;
-
-	if (address != vma->vm_start)
-		return VM_FAULT_SIGBUS;
-
-	ret = vm_insert_pfn(vma, address, vma->vm_pgoff);
-	if (likely(ret == -EBUSY || ret == 0))
-		return VM_FAULT_NOPAGE;
-	else if (ret == -ENOMEM)
-		return VM_FAULT_OOM;
-
-	return VM_FAULT_SIGBUS;
-}
-
-static struct vm_operations_struct vmw_fifo_vm_ops = {
-	.fault = vmw_fifo_vm_fault,
-	.open = NULL,
-	.close = NULL
-};
-
-int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv;
-	struct vmw_private *dev_priv;
-
-	file_priv = filp->private_data;
-	dev_priv = vmw_priv(file_priv->minor->dev);
-
-	if (vma->vm_pgoff != (dev_priv->mmio_start >> PAGE_SHIFT) ||
-	    (vma->vm_end - vma->vm_start) != PAGE_SIZE)
-		return -EINVAL;
-
-	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE);
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_SHARED;
-	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	vma->vm_page_prot = ttm_io_prot(TTM_PL_FLAG_UNCACHED,
-					vma->vm_page_prot);
-	vma->vm_ops = &vmw_fifo_vm_ops;
-	return 0;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
index de0c594..f4e7763 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2009-2011 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,6 +29,77 @@
 #include "drmP.h"
 #include "ttm/ttm_bo_driver.h"
 
+#define VMW_PPN_SIZE sizeof(unsigned long)
+
+static int vmw_gmr2_bind(struct vmw_private *dev_priv,
+			 struct page *pages[],
+			 unsigned long num_pages,
+			 int gmr_id)
+{
+	SVGAFifoCmdDefineGMR2 define_cmd;
+	SVGAFifoCmdRemapGMR2 remap_cmd;
+	uint32_t define_size = sizeof(define_cmd) + 4;
+	uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4;
+	uint32_t *cmd;
+	uint32_t *cmd_orig;
+	uint32_t i;
+
+	cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size);
+	if (unlikely(cmd == NULL))
+		return -ENOMEM;
+
+	define_cmd.gmrId = gmr_id;
+	define_cmd.numPages = num_pages;
+
+	remap_cmd.gmrId = gmr_id;
+	remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ?
+		SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32;
+	remap_cmd.offsetPages = 0;
+	remap_cmd.numPages = num_pages;
+
+	*cmd++ = SVGA_CMD_DEFINE_GMR2;
+	memcpy(cmd, &define_cmd, sizeof(define_cmd));
+	cmd += sizeof(define_cmd) / sizeof(uint32);
+
+	*cmd++ = SVGA_CMD_REMAP_GMR2;
+	memcpy(cmd, &remap_cmd, sizeof(remap_cmd));
+	cmd += sizeof(remap_cmd) / sizeof(uint32);
+
+	for (i = 0; i < num_pages; ++i) {
+		if (VMW_PPN_SIZE > 4)
+			*cmd = page_to_pfn(*pages++);
+		else
+			*((uint64_t *)cmd) = page_to_pfn(*pages++);
+
+		cmd += VMW_PPN_SIZE / sizeof(*cmd);
+	}
+
+	vmw_fifo_commit(dev_priv, define_size + remap_size);
+
+	return 0;
+}
+
+static void vmw_gmr2_unbind(struct vmw_private *dev_priv,
+			    int gmr_id)
+{
+	SVGAFifoCmdDefineGMR2 define_cmd;
+	uint32_t define_size = sizeof(define_cmd) + 4;
+	uint32_t *cmd;
+
+	cmd = vmw_fifo_reserve(dev_priv, define_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("GMR2 unbind failed.\n");
+		return;
+	}
+	define_cmd.gmrId = gmr_id;
+	define_cmd.numPages = 0;
+
+	*cmd++ = SVGA_CMD_DEFINE_GMR2;
+	memcpy(cmd, &define_cmd, sizeof(define_cmd));
+
+	vmw_fifo_commit(dev_priv, define_size);
+}
+
 /**
  * FIXME: Adjust to the ttm lowmem / highmem storage to minimize
  * the number of used descriptors.
@@ -170,6 +241,9 @@
 	struct list_head desc_pages;
 	int ret;
 
+	if (likely(dev_priv->capabilities & SVGA_CAP_GMR2))
+		return vmw_gmr2_bind(dev_priv, pages, num_pages, gmr_id);
+
 	if (unlikely(!(dev_priv->capabilities & SVGA_CAP_GMR)))
 		return -EINVAL;
 
@@ -192,6 +266,11 @@
 
 void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id)
 {
+	if (likely(dev_priv->capabilities & SVGA_CAP_GMR2)) {
+		vmw_gmr2_unbind(dev_priv, gmr_id);
+		return;
+	}
+
 	mutex_lock(&dev_priv->hw_mutex);
 	vmw_write(dev_priv, SVGA_REG_GMR_ID, gmr_id);
 	wmb();
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index ac6e0d1..5f71715 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -40,6 +40,8 @@
 	spinlock_t lock;
 	struct ida gmr_ida;
 	uint32_t max_gmr_ids;
+	uint32_t max_gmr_pages;
+	uint32_t used_gmr_pages;
 };
 
 static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
@@ -49,33 +51,50 @@
 {
 	struct vmwgfx_gmrid_man *gman =
 		(struct vmwgfx_gmrid_man *)man->priv;
-	int ret;
+	int ret = 0;
 	int id;
 
 	mem->mm_node = NULL;
 
+	spin_lock(&gman->lock);
+
+	if (gman->max_gmr_pages > 0) {
+		gman->used_gmr_pages += bo->num_pages;
+		if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages))
+			goto out_err_locked;
+	}
+
 	do {
-		if (unlikely(ida_pre_get(&gman->gmr_ida, GFP_KERNEL) == 0))
-			return -ENOMEM;
-
+		spin_unlock(&gman->lock);
+		if (unlikely(ida_pre_get(&gman->gmr_ida, GFP_KERNEL) == 0)) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
 		spin_lock(&gman->lock);
-		ret = ida_get_new(&gman->gmr_ida, &id);
 
+		ret = ida_get_new(&gman->gmr_ida, &id);
 		if (unlikely(ret == 0 && id >= gman->max_gmr_ids)) {
 			ida_remove(&gman->gmr_ida, id);
-			spin_unlock(&gman->lock);
-			return 0;
+			ret = 0;
+			goto out_err_locked;
 		}
-
-		spin_unlock(&gman->lock);
-
 	} while (ret == -EAGAIN);
 
 	if (likely(ret == 0)) {
 		mem->mm_node = gman;
 		mem->start = id;
-	}
+		mem->num_pages = bo->num_pages;
+	} else
+		goto out_err_locked;
 
+	spin_unlock(&gman->lock);
+	return 0;
+
+out_err:
+	spin_lock(&gman->lock);
+out_err_locked:
+	gman->used_gmr_pages -= bo->num_pages;
+	spin_unlock(&gman->lock);
 	return ret;
 }
 
@@ -88,6 +107,7 @@
 	if (mem->mm_node) {
 		spin_lock(&gman->lock);
 		ida_remove(&gman->gmr_ida, mem->start);
+		gman->used_gmr_pages -= mem->num_pages;
 		spin_unlock(&gman->lock);
 		mem->mm_node = NULL;
 	}
@@ -96,6 +116,8 @@
 static int vmw_gmrid_man_init(struct ttm_mem_type_manager *man,
 			      unsigned long p_size)
 {
+	struct vmw_private *dev_priv =
+		container_of(man->bdev, struct vmw_private, bdev);
 	struct vmwgfx_gmrid_man *gman =
 		kzalloc(sizeof(*gman), GFP_KERNEL);
 
@@ -103,6 +125,8 @@
 		return -ENOMEM;
 
 	spin_lock_init(&gman->lock);
+	gman->max_gmr_pages = dev_priv->max_gmr_pages;
+	gman->used_gmr_pages = 0;
 	ida_init(&gman->gmr_ida);
 	gman->max_gmr_ids = p_size;
 	man->priv = (void *) gman;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 570d577..5ecf966 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -45,9 +45,6 @@
 	case DRM_VMW_PARAM_3D:
 		param->value = vmw_fifo_have_3d(dev_priv) ? 1 : 0;
 		break;
-	case DRM_VMW_PARAM_FIFO_OFFSET:
-		param->value = dev_priv->mmio_start;
-		break;
 	case DRM_VMW_PARAM_HW_CAPS:
 		param->value = dev_priv->capabilities;
 		break;
@@ -57,6 +54,13 @@
 	case DRM_VMW_PARAM_MAX_FB_SIZE:
 		param->value = dev_priv->vram_size;
 		break;
+	case DRM_VMW_PARAM_FIFO_HW_VERSION:
+	{
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+
+		param->value = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION);
+		break;
+	}
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
@@ -66,25 +70,43 @@
 	return 0;
 }
 
-int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+
+int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
 {
+	struct drm_vmw_get_3d_cap_arg *arg =
+		(struct drm_vmw_get_3d_cap_arg *) data;
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
-	struct drm_vmw_fifo_debug_arg *arg =
-	    (struct drm_vmw_fifo_debug_arg *)data;
-	__le32 __user *buffer = (__le32 __user *)
-	    (unsigned long)arg->debug_buffer;
+	uint32_t size;
+	__le32 __iomem *fifo_mem;
+	void __user *buffer = (void __user *)((unsigned long)(arg->buffer));
+	void *bounce;
+	int ret;
 
-	if (unlikely(fifo_state->last_buffer == NULL))
+	if (unlikely(arg->pad64 != 0)) {
+		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
-
-	if (arg->debug_buffer_size < fifo_state->last_data_size) {
-		arg->used_size = arg->debug_buffer_size;
-		arg->did_not_fit = 1;
-	} else {
-		arg->used_size = fifo_state->last_data_size;
-		arg->did_not_fit = 0;
 	}
-	return copy_to_user(buffer, fifo_state->last_buffer, arg->used_size);
+
+	size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) << 2;
+
+	if (arg->max_size < size)
+		size = arg->max_size;
+
+	bounce = vmalloc(size);
+	if (unlikely(bounce == NULL)) {
+		DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n");
+		return -ENOMEM;
+	}
+
+	fifo_mem = dev_priv->mmio_virt;
+	memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
+
+	ret = copy_to_user(buffer, bounce, size);
+	vfree(bounce);
+
+	if (unlikely(ret != 0))
+		DRM_ERROR("Failed to report 3D caps info.\n");
+
+	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index e92298a..a005292 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -40,8 +40,13 @@
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	spin_unlock(&dev_priv->irq_lock);
 
-	if (status & SVGA_IRQFLAG_ANY_FENCE)
+	if (status & SVGA_IRQFLAG_ANY_FENCE) {
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+		uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+
+		vmw_fences_update(dev_priv->fman, seqno);
 		wake_up_all(&dev_priv->fence_queue);
+	}
 	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
@@ -53,7 +58,7 @@
 	return IRQ_NONE;
 }
 
-static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t sequence)
+static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
 {
 	uint32_t busy;
 
@@ -64,43 +69,43 @@
 	return (busy == 0);
 }
 
-void vmw_update_sequence(struct vmw_private *dev_priv,
+void vmw_update_seqno(struct vmw_private *dev_priv,
 			 struct vmw_fifo_state *fifo_state)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 
-	uint32_t sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
-
-	if (dev_priv->last_read_sequence != sequence) {
-		dev_priv->last_read_sequence = sequence;
-		vmw_fence_pull(&fifo_state->fence_queue, sequence);
+	if (dev_priv->last_read_seqno != seqno) {
+		dev_priv->last_read_seqno = seqno;
+		vmw_marker_pull(&fifo_state->marker_queue, seqno);
+		vmw_fences_update(dev_priv->fman, seqno);
 	}
 }
 
-bool vmw_fence_signaled(struct vmw_private *dev_priv,
-			uint32_t sequence)
+bool vmw_seqno_passed(struct vmw_private *dev_priv,
+			 uint32_t seqno)
 {
 	struct vmw_fifo_state *fifo_state;
 	bool ret;
 
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
 		return true;
 
 	fifo_state = &dev_priv->fifo;
-	vmw_update_sequence(dev_priv, fifo_state);
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+	vmw_update_seqno(dev_priv, fifo_state);
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
 		return true;
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE) &&
-	    vmw_fifo_idle(dev_priv, sequence))
+	    vmw_fifo_idle(dev_priv, seqno))
 		return true;
 
 	/**
-	 * Then check if the sequence is higher than what we've actually
+	 * Then check if the seqno is higher than what we've actually
 	 * emitted. Then the fence is stale and signaled.
 	 */
 
-	ret = ((atomic_read(&dev_priv->fence_seq) - sequence)
+	ret = ((atomic_read(&dev_priv->marker_seq) - seqno)
 	       > VMW_FENCE_WRAP);
 
 	return ret;
@@ -109,7 +114,7 @@
 int vmw_fallback_wait(struct vmw_private *dev_priv,
 		      bool lazy,
 		      bool fifo_idle,
-		      uint32_t sequence,
+		      uint32_t seqno,
 		      bool interruptible,
 		      unsigned long timeout)
 {
@@ -123,7 +128,7 @@
 	DEFINE_WAIT(__wait);
 
 	wait_condition = (fifo_idle) ? &vmw_fifo_idle :
-		&vmw_fence_signaled;
+		&vmw_seqno_passed;
 
 	/**
 	 * Block command submission while waiting for idle.
@@ -131,14 +136,14 @@
 
 	if (fifo_idle)
 		down_read(&fifo_state->rwsem);
-	signal_seq = atomic_read(&dev_priv->fence_seq);
+	signal_seq = atomic_read(&dev_priv->marker_seq);
 	ret = 0;
 
 	for (;;) {
 		prepare_to_wait(&dev_priv->fence_queue, &__wait,
 				(interruptible) ?
 				TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
-		if (wait_condition(dev_priv, sequence))
+		if (wait_condition(dev_priv, seqno))
 			break;
 		if (time_after_eq(jiffies, end_jiffies)) {
 			DRM_ERROR("SVGA device lockup.\n");
@@ -175,32 +180,12 @@
 	return ret;
 }
 
-int vmw_wait_fence(struct vmw_private *dev_priv,
-		   bool lazy, uint32_t sequence,
-		   bool interruptible, unsigned long timeout)
+void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
 {
-	long ret;
-	unsigned long irq_flags;
-	struct vmw_fifo_state *fifo = &dev_priv->fifo;
-
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
-		return 0;
-
-	if (likely(vmw_fence_signaled(dev_priv, sequence)))
-		return 0;
-
-	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
-
-	if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
-		return vmw_fallback_wait(dev_priv, lazy, true, sequence,
-					 interruptible, timeout);
-
-	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
-		return vmw_fallback_wait(dev_priv, lazy, false, sequence,
-					 interruptible, timeout);
-
 	mutex_lock(&dev_priv->hw_mutex);
-	if (atomic_add_return(1, &dev_priv->fence_queue_waiters) > 0) {
+	if (dev_priv->fence_queue_waiters++ == 0) {
+		unsigned long irq_flags;
+
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_ANY_FENCE,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
@@ -210,25 +195,14 @@
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
+}
 
-	if (interruptible)
-		ret = wait_event_interruptible_timeout
-		    (dev_priv->fence_queue,
-		     vmw_fence_signaled(dev_priv, sequence),
-		     timeout);
-	else
-		ret = wait_event_timeout
-		    (dev_priv->fence_queue,
-		     vmw_fence_signaled(dev_priv, sequence),
-		     timeout);
-
-	if (unlikely(ret == 0))
-		ret = -EBUSY;
-	else if (likely(ret > 0))
-		ret = 0;
-
+void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+{
 	mutex_lock(&dev_priv->hw_mutex);
-	if (atomic_dec_and_test(&dev_priv->fence_queue_waiters)) {
+	if (--dev_priv->fence_queue_waiters == 0) {
+		unsigned long irq_flags;
+
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		vmw_write(dev_priv, SVGA_REG_IRQMASK,
 			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
@@ -236,6 +210,50 @@
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+int vmw_wait_seqno(struct vmw_private *dev_priv,
+		      bool lazy, uint32_t seqno,
+		      bool interruptible, unsigned long timeout)
+{
+	long ret;
+	struct vmw_fifo_state *fifo = &dev_priv->fifo;
+
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
+		return 0;
+
+	if (likely(vmw_seqno_passed(dev_priv, seqno)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+
+	if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
+		return vmw_fallback_wait(dev_priv, lazy, true, seqno,
+					 interruptible, timeout);
+
+	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
+		return vmw_fallback_wait(dev_priv, lazy, false, seqno,
+					 interruptible, timeout);
+
+	vmw_seqno_waiter_add(dev_priv);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+		    (dev_priv->fence_queue,
+		     vmw_seqno_passed(dev_priv, seqno),
+		     timeout);
+	else
+		ret = wait_event_timeout
+		    (dev_priv->fence_queue,
+		     vmw_seqno_passed(dev_priv, seqno),
+		     timeout);
+
+	vmw_seqno_waiter_remove(dev_priv);
+
+	if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
 
 	return ret;
 }
@@ -273,25 +291,3 @@
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 }
-
-#define VMW_FENCE_WAIT_TIMEOUT 3*HZ;
-
-int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv)
-{
-	struct drm_vmw_fence_wait_arg *arg =
-	    (struct drm_vmw_fence_wait_arg *)data;
-	unsigned long timeout;
-
-	if (!arg->cookie_valid) {
-		arg->cookie_valid = 1;
-		arg->kernel_cookie = jiffies + VMW_FENCE_WAIT_TIMEOUT;
-	}
-
-	timeout = jiffies;
-	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie))
-		return -EBUSY;
-
-	timeout = (unsigned long)arg->kernel_cookie - timeout;
-	return vmw_wait_fence(vmw_priv(dev), true, arg->sequence, true, timeout);
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index dfe32e6..1a4c84c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -567,6 +567,9 @@
 	case 15:
 		format = SVGA3D_A1R5G5B5;
 		break;
+	case 8:
+		format = SVGA3D_LUMINANCE8;
+		break;
 	default:
 		DRM_ERROR("Invalid color depth: %d\n", mode_cmd->depth);
 		return -EINVAL;
@@ -987,9 +990,9 @@
 	return ret;
 }
 
-void vmw_kms_write_svga(struct vmw_private *vmw_priv,
+int vmw_kms_write_svga(struct vmw_private *vmw_priv,
 			unsigned width, unsigned height, unsigned pitch,
-			unsigned bbp, unsigned depth)
+			unsigned bpp, unsigned depth)
 {
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, pitch);
@@ -997,11 +1000,15 @@
 		iowrite32(pitch, vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
 	vmw_write(vmw_priv, SVGA_REG_WIDTH, width);
 	vmw_write(vmw_priv, SVGA_REG_HEIGHT, height);
-	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bbp);
-	vmw_write(vmw_priv, SVGA_REG_DEPTH, depth);
-	vmw_write(vmw_priv, SVGA_REG_RED_MASK, 0x00ff0000);
-	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, 0x0000ff00);
-	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, 0x000000ff);
+	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bpp);
+
+	if (vmw_read(vmw_priv, SVGA_REG_DEPTH) != depth) {
+		DRM_ERROR("Invalid depth %u for %u bpp, host expects %u\n",
+			  depth, bpp, vmw_read(vmw_priv, SVGA_REG_DEPTH));
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 int vmw_kms_save_vga(struct vmw_private *vmw_priv)
@@ -1011,12 +1018,7 @@
 
 	vmw_priv->vga_width = vmw_read(vmw_priv, SVGA_REG_WIDTH);
 	vmw_priv->vga_height = vmw_read(vmw_priv, SVGA_REG_HEIGHT);
-	vmw_priv->vga_depth = vmw_read(vmw_priv, SVGA_REG_DEPTH);
 	vmw_priv->vga_bpp = vmw_read(vmw_priv, SVGA_REG_BITS_PER_PIXEL);
-	vmw_priv->vga_pseudo = vmw_read(vmw_priv, SVGA_REG_PSEUDOCOLOR);
-	vmw_priv->vga_red_mask = vmw_read(vmw_priv, SVGA_REG_RED_MASK);
-	vmw_priv->vga_blue_mask = vmw_read(vmw_priv, SVGA_REG_BLUE_MASK);
-	vmw_priv->vga_green_mask = vmw_read(vmw_priv, SVGA_REG_GREEN_MASK);
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_priv->vga_pitchlock =
 		  vmw_read(vmw_priv, SVGA_REG_PITCHLOCK);
@@ -1065,12 +1067,7 @@
 
 	vmw_write(vmw_priv, SVGA_REG_WIDTH, vmw_priv->vga_width);
 	vmw_write(vmw_priv, SVGA_REG_HEIGHT, vmw_priv->vga_height);
-	vmw_write(vmw_priv, SVGA_REG_DEPTH, vmw_priv->vga_depth);
 	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, vmw_priv->vga_bpp);
-	vmw_write(vmw_priv, SVGA_REG_PSEUDOCOLOR, vmw_priv->vga_pseudo);
-	vmw_write(vmw_priv, SVGA_REG_RED_MASK, vmw_priv->vga_red_mask);
-	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, vmw_priv->vga_green_mask);
-	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, vmw_priv->vga_blue_mask);
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK,
 			  vmw_priv->vga_pitchlock);
@@ -1095,52 +1092,6 @@
 	return 0;
 }
 
-int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_vmw_update_layout_arg *arg =
-		(struct drm_vmw_update_layout_arg *)data;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	void __user *user_rects;
-	struct drm_vmw_rect *rects;
-	unsigned rects_size;
-	int ret;
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	if (!arg->num_outputs) {
-		struct drm_vmw_rect def_rect = {0, 0, 800, 600};
-		vmw_kms_ldu_update_layout(dev_priv, 1, &def_rect);
-		goto out_unlock;
-	}
-
-	rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect);
-	rects = kzalloc(rects_size, GFP_KERNEL);
-	if (unlikely(!rects)) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	user_rects = (void __user *)(unsigned long)arg->rects;
-	ret = copy_from_user(rects, user_rects, rects_size);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Failed to get rects.\n");
-		ret = -EFAULT;
-		goto out_free;
-	}
-
-	vmw_kms_ldu_update_layout(dev_priv, arg->num_outputs, rects);
-
-out_free:
-	kfree(rects);
-out_unlock:
-	ttm_read_unlock(&vmaster->lock);
-	return ret;
-}
-
 bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
 				uint32_t height)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b3a2cd5..7e1901c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -83,6 +83,15 @@
 				   u16 *r, u16 *g, u16 *b,
 				   uint32_t start, uint32_t size)
 {
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	int i;
+
+	for (i = 0; i < size; i++) {
+		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i, r[i], g[i], b[i]);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
+	}
 }
 
 static void vmw_ldu_crtc_destroy(struct drm_crtc *crtc)
@@ -114,10 +123,8 @@
 			return 0;
 		fb = entry->base.crtc.fb;
 
-		vmw_kms_write_svga(dev_priv, w, h, fb->pitch,
-				   fb->bits_per_pixel, fb->depth);
-
-		return 0;
+		return vmw_kms_write_svga(dev_priv, w, h, fb->pitch,
+					  fb->bits_per_pixel, fb->depth);
 	}
 
 	if (!list_empty(&lds->active)) {
@@ -265,9 +272,7 @@
 
 		vmw_ldu_del_active(dev_priv, ldu);
 
-		vmw_ldu_commit_list(dev_priv);
-
-		return 0;
+		return vmw_ldu_commit_list(dev_priv);
 	}
 
 
@@ -292,9 +297,7 @@
 
 	vmw_ldu_add_active(dev_priv, ldu, vfb);
 
-	vmw_ldu_commit_list(dev_priv);
-
-	return 0;
+	return vmw_ldu_commit_list(dev_priv);
 }
 
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
@@ -340,9 +343,16 @@
 	vmw_ldu_connector_detect(struct drm_connector *connector,
 				 bool force)
 {
-	if (vmw_connector_to_ldu(connector)->pref_active)
-		return connector_status_connected;
-	return connector_status_disconnected;
+	uint32_t num_displays;
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ((vmw_connector_to_ldu(connector)->base.unit < num_displays) ?
+		connector_status_connected : connector_status_disconnected);
 }
 
 static const struct drm_display_mode vmw_ldu_connector_builtin[] = {
@@ -540,6 +550,8 @@
 
 	drm_crtc_init(dev, crtc, &vmw_legacy_crtc_funcs);
 
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
 	drm_connector_attach_property(connector,
 				      dev->mode_config.dirty_info_property,
 				      1);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c b/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c
new file mode 100644
index 0000000..8a8725c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c
@@ -0,0 +1,171 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2010 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "vmwgfx_drv.h"
+
+struct vmw_marker {
+	struct list_head head;
+	uint32_t seqno;
+	struct timespec submitted;
+};
+
+void vmw_marker_queue_init(struct vmw_marker_queue *queue)
+{
+	INIT_LIST_HEAD(&queue->head);
+	queue->lag = ns_to_timespec(0);
+	getrawmonotonic(&queue->lag_time);
+	spin_lock_init(&queue->lock);
+}
+
+void vmw_marker_queue_takedown(struct vmw_marker_queue *queue)
+{
+	struct vmw_marker *marker, *next;
+
+	spin_lock(&queue->lock);
+	list_for_each_entry_safe(marker, next, &queue->head, head) {
+		kfree(marker);
+	}
+	spin_unlock(&queue->lock);
+}
+
+int vmw_marker_push(struct vmw_marker_queue *queue,
+		   uint32_t seqno)
+{
+	struct vmw_marker *marker = kmalloc(sizeof(*marker), GFP_KERNEL);
+
+	if (unlikely(!marker))
+		return -ENOMEM;
+
+	marker->seqno = seqno;
+	getrawmonotonic(&marker->submitted);
+	spin_lock(&queue->lock);
+	list_add_tail(&marker->head, &queue->head);
+	spin_unlock(&queue->lock);
+
+	return 0;
+}
+
+int vmw_marker_pull(struct vmw_marker_queue *queue,
+		   uint32_t signaled_seqno)
+{
+	struct vmw_marker *marker, *next;
+	struct timespec now;
+	bool updated = false;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+
+	if (list_empty(&queue->head)) {
+		queue->lag = ns_to_timespec(0);
+		queue->lag_time = now;
+		updated = true;
+		goto out_unlock;
+	}
+
+	list_for_each_entry_safe(marker, next, &queue->head, head) {
+		if (signaled_seqno - marker->seqno > (1 << 30))
+			continue;
+
+		queue->lag = timespec_sub(now, marker->submitted);
+		queue->lag_time = now;
+		updated = true;
+		list_del(&marker->head);
+		kfree(marker);
+	}
+
+out_unlock:
+	spin_unlock(&queue->lock);
+
+	return (updated) ? 0 : -EBUSY;
+}
+
+static struct timespec vmw_timespec_add(struct timespec t1,
+					struct timespec t2)
+{
+	t1.tv_sec += t2.tv_sec;
+	t1.tv_nsec += t2.tv_nsec;
+	if (t1.tv_nsec >= 1000000000L) {
+		t1.tv_sec += 1;
+		t1.tv_nsec -= 1000000000L;
+	}
+
+	return t1;
+}
+
+static struct timespec vmw_fifo_lag(struct vmw_marker_queue *queue)
+{
+	struct timespec now;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+	queue->lag = vmw_timespec_add(queue->lag,
+				      timespec_sub(now, queue->lag_time));
+	queue->lag_time = now;
+	spin_unlock(&queue->lock);
+	return queue->lag;
+}
+
+
+static bool vmw_lag_lt(struct vmw_marker_queue *queue,
+		       uint32_t us)
+{
+	struct timespec lag, cond;
+
+	cond = ns_to_timespec((s64) us * 1000);
+	lag = vmw_fifo_lag(queue);
+	return (timespec_compare(&lag, &cond) < 1);
+}
+
+int vmw_wait_lag(struct vmw_private *dev_priv,
+		 struct vmw_marker_queue *queue, uint32_t us)
+{
+	struct vmw_marker *marker;
+	uint32_t seqno;
+	int ret;
+
+	while (!vmw_lag_lt(queue, us)) {
+		spin_lock(&queue->lock);
+		if (list_empty(&queue->head))
+			seqno = atomic_read(&dev_priv->marker_seq);
+		else {
+			marker = list_first_entry(&queue->head,
+						 struct vmw_marker, head);
+			seqno = marker->seqno;
+		}
+		spin_unlock(&queue->lock);
+
+		ret = vmw_wait_seqno(dev_priv, false, seqno, true,
+					3*HZ);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		(void) vmw_marker_pull(queue, seqno);
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index bfe1bcc..c1b6ffd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -31,10 +31,6 @@
 #include "ttm/ttm_placement.h"
 #include "drmP.h"
 
-#define VMW_RES_CONTEXT ttm_driver_type0
-#define VMW_RES_SURFACE ttm_driver_type1
-#define VMW_RES_STREAM ttm_driver_type2
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
@@ -211,7 +207,7 @@
 	cmd->body.cid = cpu_to_le32(res->id);
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	vmw_3d_resource_dec(dev_priv);
+	vmw_3d_resource_dec(dev_priv, false);
 }
 
 static int vmw_context_init(struct vmw_private *dev_priv,
@@ -248,7 +244,7 @@
 	cmd->body.cid = cpu_to_le32(res->id);
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	(void) vmw_3d_resource_inc(dev_priv);
+	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_context_destroy);
 	return 0;
 }
@@ -364,7 +360,8 @@
 
 int vmw_context_check(struct vmw_private *dev_priv,
 		      struct ttm_object_file *tfile,
-		      int id)
+		      int id,
+		      struct vmw_resource **p_res)
 {
 	struct vmw_resource *res;
 	int ret = 0;
@@ -376,6 +373,8 @@
 			container_of(res, struct vmw_user_context, res);
 		if (ctx->base.tfile != tfile && !ctx->base.shareable)
 			ret = -EPERM;
+		if (p_res)
+			*p_res = vmw_resource_reference(res);
 	} else
 		ret = -EINVAL;
 	read_unlock(&dev_priv->resource_lock);
@@ -408,7 +407,7 @@
 	cmd->body.sid = cpu_to_le32(res->id);
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	vmw_3d_resource_dec(dev_priv);
+	vmw_3d_resource_dec(dev_priv, false);
 }
 
 void vmw_surface_res_free(struct vmw_resource *res)
@@ -476,7 +475,7 @@
 	}
 
 	vmw_fifo_commit(dev_priv, submit_size);
-	(void) vmw_3d_resource_inc(dev_priv);
+	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_surface_destroy);
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 1e8eedd..d3c11f5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -34,9 +34,8 @@
 	struct vmw_private *dev_priv;
 
 	if (unlikely(vma->vm_pgoff < VMWGFX_FILE_PAGE_OFFSET)) {
-		if (vmw_fifo_mmap(filp, vma) == 0)
-			return 0;
-		return drm_mmap(filp, vma);
+		DRM_ERROR("Illegal attempt to mmap old fifo space.\n");
+		return -EINVAL;
 	}
 
 	file_priv = filp->private_data;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 9b7c2bb..43538b6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1624,6 +1624,9 @@
 	drm_gem_object_unreference_unlocked(obj);
 }
 
+void drm_gem_free_mmap_offset(struct drm_gem_object *obj);
+int drm_gem_create_mmap_offset(struct drm_gem_object *obj);
+
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
 					     u32 handle);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 44335e5..8020798 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -466,6 +466,8 @@
 /* DACs should rarely do this without a lot of testing */
 #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2)
 
+#define MAX_ELD_BYTES	128
+
 /**
  * drm_connector - central DRM connector control structure
  * @crtc: CRTC this connector is currently connected to, NULL if none
@@ -523,6 +525,13 @@
 	uint32_t force_encoder_id;
 	struct drm_encoder *encoder; /* currently active encoder */
 
+	/* EDID bits */
+	uint8_t eld[MAX_ELD_BYTES];
+	bool dvi_dual;
+	int max_tmds_clock;	/* in MHz */
+	bool latency_present[2];
+	int video_latency[2];	/* [0]: progressive, [1]: interlaced */
+	int audio_latency[2];
 	int null_edid_counter; /* needed to workaround some HW bugs where we get all 0s */
 };
 
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index eacb415..74ce916 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -230,4 +230,13 @@
 
 #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
 
+struct drm_encoder;
+struct drm_connector;
+struct drm_display_mode;
+void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid);
+int drm_av_sync_delay(struct drm_connector *connector,
+		      struct drm_display_mode *mode);
+struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
+				     struct drm_display_mode *mode);
+
 #endif /* __DRM_EDID_H__ */
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b65be60..939b854 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -509,6 +509,7 @@
 #define DRM_RADEON_GEM_SET_TILING	0x28
 #define DRM_RADEON_GEM_GET_TILING	0x29
 #define DRM_RADEON_GEM_BUSY		0x2a
+#define DRM_RADEON_GEM_WAIT		0x2b
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -550,6 +551,7 @@
 #define DRM_IOCTL_RADEON_GEM_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
 #define DRM_IOCTL_RADEON_GEM_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
+#define DRM_IOCTL_RADEON_GEM_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT, struct drm_radeon_gem_wait)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -846,6 +848,15 @@
 	uint32_t        domain;
 };
 
+#define RADEON_GEM_NO_WAIT	0x1
+#define RADEON_GEM_USAGE_READ	0x2
+#define RADEON_GEM_USAGE_WRITE	0x4
+
+struct drm_radeon_gem_wait {
+	uint32_t	handle;
+	uint32_t        flags;  /* one of RADEON_GEM_* */
+};
+
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
 	uint32_t handle;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 42e3469..da957bf 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -44,6 +44,11 @@
 
 struct drm_mm_node;
 
+enum ttm_buffer_usage {
+    TTM_USAGE_READ = 1,
+    TTM_USAGE_WRITE = 2,
+    TTM_USAGE_READWRITE = TTM_USAGE_READ | TTM_USAGE_WRITE
+};
 
 /**
  * struct ttm_placement
@@ -174,7 +179,10 @@
  * the bo_device::lru_lock.
  * @reserved: Deadlock-free lock used for synchronization state transitions.
  * @sync_obj_arg: Opaque argument to synchronization object function.
- * @sync_obj: Pointer to a synchronization object.
+ * @sync_obj: Pointer to a synchronization object of a last read or write,
+ * whichever is later.
+ * @sync_obj_read: Pointer to a synchronization object of a last read.
+ * @sync_obj_write: Pointer to a synchronization object of a last write.
  * @priv_flags: Flags describing buffer object internal state.
  * @vm_rb: Rb node for the vm rb tree.
  * @vm_node: Address space manager node.
@@ -258,6 +266,8 @@
 
 	void *sync_obj_arg;
 	void *sync_obj;
+	void *sync_obj_read;
+	void *sync_obj_write;
 	unsigned long priv_flags;
 
 	/**
@@ -325,6 +335,7 @@
  * @bo:  The buffer object.
  * @interruptible:  Use interruptible wait.
  * @no_wait:  Return immediately if buffer is busy.
+ * @usage:  Whether to wait for the last read and/or the last write.
  *
  * This function must be called with the bo::mutex held, and makes
  * sure any previous rendering to the buffer is completed.
@@ -334,7 +345,8 @@
  * Returns -ERESTARTSYS if interrupted by a signal.
  */
 extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
-		       bool interruptible, bool no_wait);
+		       bool interruptible, bool no_wait,
+		       enum ttm_buffer_usage usage);
 /**
  * ttm_bo_validate
  *
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 26cc7f9..375f299 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -41,20 +41,26 @@
  * @bo:             refcounted buffer object pointer.
  * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once
  * adding a new sync object.
+ * @usage           Indicates how @bo is used by the device.
  * @reserved:       Indicates whether @bo has been reserved for validation.
  * @removed:        Indicates whether @bo has been removed from lru lists.
  * @put_count:      Number of outstanding references on bo::list_kref.
  * @old_sync_obj:   Pointer to a sync object about to be unreferenced
+ * @old_sync_obj_read: Pointer to a read sync object about to be unreferenced.
+ * @old_sync_obj_write: Pointer to a write sync object about to be unreferenced.
  */
 
 struct ttm_validate_buffer {
 	struct list_head head;
 	struct ttm_buffer_object *bo;
 	void *new_sync_obj_arg;
+	enum ttm_buffer_usage usage;
 	bool reserved;
 	bool removed;
 	int put_count;
 	void *old_sync_obj;
+	void *old_sync_obj_read;
+	void *old_sync_obj_write;
 };
 
 /**
diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h
index 5c36432..29cd9cf 100644
--- a/include/drm/vmwgfx_drm.h
+++ b/include/drm/vmwgfx_drm.h
@@ -31,7 +31,6 @@
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
 
-#define DRM_VMW_EXT_NAME_LEN 128
 
 #define DRM_VMW_GET_PARAM            0
 #define DRM_VMW_ALLOC_DMABUF         1
@@ -48,10 +47,11 @@
 #define DRM_VMW_UNREF_SURFACE        10
 #define DRM_VMW_REF_SURFACE          11
 #define DRM_VMW_EXECBUF              12
-#define DRM_VMW_FIFO_DEBUG           13
+#define DRM_VMW_GET_3D_CAP           13
 #define DRM_VMW_FENCE_WAIT           14
-/* guarded by minor version >= 2 */
-#define DRM_VMW_UPDATE_LAYOUT        15
+#define DRM_VMW_FENCE_SIGNALED       15
+#define DRM_VMW_FENCE_UNREF          16
+#define DRM_VMW_FENCE_EVENT          17
 
 
 /*************************************************************************/
@@ -69,10 +69,10 @@
 #define DRM_VMW_PARAM_NUM_STREAMS      0
 #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1
 #define DRM_VMW_PARAM_3D               2
-#define DRM_VMW_PARAM_FIFO_OFFSET      3
-#define DRM_VMW_PARAM_HW_CAPS          4
-#define DRM_VMW_PARAM_FIFO_CAPS        5
-#define DRM_VMW_PARAM_MAX_FB_SIZE      6
+#define DRM_VMW_PARAM_HW_CAPS          3
+#define DRM_VMW_PARAM_FIFO_CAPS        4
+#define DRM_VMW_PARAM_MAX_FB_SIZE      5
+#define DRM_VMW_PARAM_FIFO_HW_VERSION  6
 
 /**
  * struct drm_vmw_getparam_arg
@@ -91,49 +91,6 @@
 
 /*************************************************************************/
 /**
- * DRM_VMW_EXTENSION - Query device extensions.
- */
-
-/**
- * struct drm_vmw_extension_rep
- *
- * @exists: The queried extension exists.
- * @driver_ioctl_offset: Ioctl number of the first ioctl in the extension.
- * @driver_sarea_offset: Offset to any space in the DRI SAREA
- * used by the extension.
- * @major: Major version number of the extension.
- * @minor: Minor version number of the extension.
- * @pl: Patch level version number of the extension.
- *
- * Output argument to the DRM_VMW_EXTENSION Ioctl.
- */
-
-struct drm_vmw_extension_rep {
-	int32_t exists;
-	uint32_t driver_ioctl_offset;
-	uint32_t driver_sarea_offset;
-	uint32_t major;
-	uint32_t minor;
-	uint32_t pl;
-	uint32_t pad64;
-};
-
-/**
- * union drm_vmw_extension_arg
- *
- * @extension - Ascii name of the extension to be queried. //In
- * @rep - Reply as defined above. //Out
- *
- * Argument to the DRM_VMW_EXTENSION Ioctl.
- */
-
-union drm_vmw_extension_arg {
-	char extension[DRM_VMW_EXT_NAME_LEN];
-	struct drm_vmw_extension_rep rep;
-};
-
-/*************************************************************************/
-/**
  * DRM_VMW_CREATE_CONTEXT - Create a host context.
  *
  * Allocates a device unique context id, and queues a create context command
@@ -292,7 +249,7 @@
  * DRM_VMW_EXECBUF
  *
  * Submit a command buffer for execution on the host, and return a
- * fence sequence that when signaled, indicates that the command buffer has
+ * fence seqno that when signaled, indicates that the command buffer has
  * executed.
  */
 
@@ -314,21 +271,30 @@
  * Argument to the DRM_VMW_EXECBUF Ioctl.
  */
 
-#define DRM_VMW_EXECBUF_VERSION 0
+#define DRM_VMW_EXECBUF_VERSION 1
 
 struct drm_vmw_execbuf_arg {
 	uint64_t commands;
 	uint32_t command_size;
 	uint32_t throttle_us;
 	uint64_t fence_rep;
-	 uint32_t version;
-	 uint32_t flags;
+	uint32_t version;
+	uint32_t flags;
 };
 
 /**
  * struct drm_vmw_fence_rep
  *
- * @fence_seq: Fence sequence associated with a command submission.
+ * @handle: Fence object handle for fence associated with a command submission.
+ * @mask: Fence flags relevant for this fence object.
+ * @seqno: Fence sequence number in fifo. A fence object with a lower
+ * seqno will signal the EXEC flag before a fence object with a higher
+ * seqno. This can be used by user-space to avoid kernel calls to determine
+ * whether a fence has signaled the EXEC flag. Note that @seqno will
+ * wrap at 32-bit.
+ * @passed_seqno: The highest seqno number processed by the hardware
+ * so far. This can be used to mark user-space fence objects as signaled, and
+ * to determine whether a fence seqno might be stale.
  * @error: This member should've been set to -EFAULT on submission.
  * The following actions should be take on completion:
  * error == -EFAULT: Fence communication failed. The host is synchronized.
@@ -342,9 +308,12 @@
  */
 
 struct drm_vmw_fence_rep {
-	uint64_t fence_seq;
-	int32_t error;
+	uint32_t handle;
+	uint32_t mask;
+	uint32_t seqno;
+	uint32_t passed_seqno;
 	uint32_t pad64;
+	int32_t error;
 };
 
 /*************************************************************************/
@@ -435,39 +404,6 @@
 
 /*************************************************************************/
 /**
- * DRM_VMW_FIFO_DEBUG - Get last FIFO submission.
- *
- * This IOCTL copies the last FIFO submission directly out of the FIFO buffer.
- */
-
-/**
- * struct drm_vmw_fifo_debug_arg
- *
- * @debug_buffer: User space address of a debug_buffer cast to an uint64_t //In
- * @debug_buffer_size: Size in bytes of debug buffer //In
- * @used_size: Number of bytes copied to the buffer // Out
- * @did_not_fit: Boolean indicating that the fifo contents did not fit. //Out
- *
- * Argument to the DRM_VMW_FIFO_DEBUG Ioctl.
- */
-
-struct drm_vmw_fifo_debug_arg {
-	uint64_t debug_buffer;
-	uint32_t debug_buffer_size;
-	uint32_t used_size;
-	int32_t did_not_fit;
-	uint32_t pad64;
-};
-
-struct drm_vmw_fence_wait_arg {
-	uint64_t sequence;
-	uint64_t kernel_cookie;
-	int32_t cookie_valid;
-	int32_t pad64;
-};
-
-/*************************************************************************/
-/**
  * DRM_VMW_CONTROL_STREAM - Control overlays, aka streams.
  *
  * This IOCTL controls the overlay units of the svga device.
@@ -590,6 +526,30 @@
 
 /*************************************************************************/
 /**
+ * DRM_VMW_GET_3D_CAP
+ *
+ * Read 3D capabilities from the FIFO
+ *
+ */
+
+/**
+ * struct drm_vmw_get_3d_cap_arg
+ *
+ * @buffer: Pointer to a buffer for capability data, cast to an uint64_t
+ * @size: Max size to copy
+ *
+ * Input argument to the DRM_VMW_GET_3D_CAP_IOCTL
+ * ioctls.
+ */
+
+struct drm_vmw_get_3d_cap_arg {
+	uint64_t buffer;
+	uint32_t max_size;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
  * DRM_VMW_UPDATE_LAYOUT - Update layout
  *
  * Updates the preferred modes and connection status for connectors. The
@@ -612,4 +572,114 @@
 	uint64_t rects;
 };
 
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_WAIT
+ *
+ * Waits for a fence object to signal. The wait is interruptible, so that
+ * signals may be delivered during the interrupt. The wait may timeout,
+ * in which case the calls returns -EBUSY. If the wait is restarted,
+ * that is restarting without resetting @cookie_valid to zero,
+ * the timeout is computed from the first call.
+ *
+ * The flags argument to the DRM_VMW_FENCE_WAIT ioctl indicates what to wait
+ * on:
+ * DRM_VMW_FENCE_FLAG_EXEC: All commands ahead of the fence in the command
+ * stream
+ * have executed.
+ * DRM_VMW_FENCE_FLAG_QUERY: All query results resulting from query finish
+ * commands
+ * in the buffer given to the EXECBUF ioctl returning the fence object handle
+ * are available to user-space.
+ *
+ * DRM_VMW_WAIT_OPTION_UNREF: If this wait option is given, and the
+ * fenc wait ioctl returns 0, the fence object has been unreferenced after
+ * the wait.
+ */
+
+#define DRM_VMW_FENCE_FLAG_EXEC   (1 << 0)
+#define DRM_VMW_FENCE_FLAG_QUERY  (1 << 1)
+
+#define DRM_VMW_WAIT_OPTION_UNREF (1 << 0)
+
+/**
+ * struct drm_vmw_fence_wait_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ * @cookie_valid: Must be reset to 0 on first call. Left alone on restart.
+ * @kernel_cookie: Set to 0 on first call. Left alone on restart.
+ * @timeout_us: Wait timeout in microseconds. 0 for indefinite timeout.
+ * @lazy: Set to 1 if timing is not critical. Allow more than a kernel tick
+ * before returning.
+ * @flags: Fence flags to wait on.
+ * @wait_options: Options that control the behaviour of the wait ioctl.
+ *
+ * Input argument to the DRM_VMW_FENCE_WAIT ioctl.
+ */
+
+struct drm_vmw_fence_wait_arg {
+	uint32_t handle;
+	int32_t  cookie_valid;
+	uint64_t kernel_cookie;
+	uint64_t timeout_us;
+	int32_t lazy;
+	int32_t flags;
+	int32_t wait_options;
+	int32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_SIGNALED
+ *
+ * Checks if a fence object is signaled..
+ */
+
+/**
+ * struct drm_vmw_fence_signaled_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ * @flags: Fence object flags input to DRM_VMW_FENCE_SIGNALED ioctl
+ * @signaled: Out: Flags signaled.
+ * @sequence: Out: Highest sequence passed so far. Can be used to signal the
+ * EXEC flag of user-space fence objects.
+ *
+ * Input/Output argument to the DRM_VMW_FENCE_SIGNALED and DRM_VMW_FENCE_UNREF
+ * ioctls.
+ */
+
+struct drm_vmw_fence_signaled_arg {
+	 uint32_t handle;
+	 uint32_t flags;
+	 int32_t signaled;
+	 uint32_t passed_seqno;
+	 uint32_t signaled_flags;
+	 uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FENCE_UNREF
+ *
+ * Unreferences a fence object, and causes it to be destroyed if there are no
+ * other references to it.
+ *
+ */
+
+/**
+ * struct drm_vmw_fence_arg
+ *
+ * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl.
+ *
+ * Input/Output argument to the DRM_VMW_FENCE_UNREF ioctl..
+ */
+
+struct drm_vmw_fence_arg {
+	 uint32_t handle;
+	 uint32_t pad64;
+};
+
+
+
 #endif