Merge branch 'drm-next-3.19' of git://people.freedesktop.org/~agd5f/linux into drm-next

- More CI dpm fixes
- Initial DPM fan control for SI/CI (disabled by default)
- GPUVM multi-ring efficiency improvements
- Some cursor fixes

* 'drm-next-3.19' of git://people.freedesktop.org/~agd5f/linux: (22 commits)
  drm/radeon: update the VM after setting BO address v4
  drm/radeon: sync PT updates as shared v2
  drm/radeon: sync PD updates as shared
  drm/radeon: fence BO_VAs manually
  drm/radeon: use one VMID for each ring
  drm/radeon: track VM update fences separately
  drm/radeon: fence PT updates manually v2
  drm/radeon: split semaphore and sync object handling v2
  drm/radeon: remove unnecessary VM syncs
  drm/radeon: stop re-reserving the BO in radeon_vm_bo_set_addr
  drm/radeon: rework vm_flush parameters
  drm/radeon/ci: disable needless sclk changes
  drm/radeon/ci: force pcie level before sclk and mclk
  drm/radeon/ci: use different smc command for pcie dpm
  drm/radeon/ci: apply disp voltage changes before clk changes
  drm/radeon: fix PCC debugging message for CI DPM
  drm/radeon/dpm: add thermal dpm support for CI
  drm/radeon/dpm: add smc fan control for CI (v2)
  drm/radeon/dpm: add smc fan control for SI (v2)
  drm/radeon: work around a hw bug in MGCG on CIK
  ...
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index bad6caa..12bc212 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -80,7 +80,8 @@
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
-	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
+	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
+	radeon_sync.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 9dbc52f..3f898d0 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -184,6 +184,9 @@
 				       u32 target_tdp);
 static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate);
 
+static PPSMC_Result ci_send_msg_to_smc_with_parameter(struct radeon_device *rdev,
+						      PPSMC_Msg msg, u32 parameter);
+
 static struct ci_power_info *ci_get_pi(struct radeon_device *rdev)
 {
         struct ci_power_info *pi = rdev->pm.dpm.priv;
@@ -355,6 +358,21 @@
 	return 0;
 }
 
+static int ci_populate_fuzzy_fan(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if ((rdev->pm.dpm.fan.fan_output_sensitivity & (1 << 15)) ||
+	    (rdev->pm.dpm.fan.fan_output_sensitivity == 0))
+		rdev->pm.dpm.fan.fan_output_sensitivity =
+			rdev->pm.dpm.fan.default_fan_output_sensitivity;
+
+	pi->smc_powertune_table.FuzzyFan_PwmSetDelta =
+		cpu_to_be16(rdev->pm.dpm.fan.fan_output_sensitivity);
+
+	return 0;
+}
+
 static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct radeon_device *rdev)
 {
 	struct ci_power_info *pi = ci_get_pi(rdev);
@@ -480,6 +498,9 @@
 		ret = ci_populate_dw8(rdev);
 		if (ret)
 			return ret;
+		ret = ci_populate_fuzzy_fan(rdev);
+		if (ret)
+			return ret;
 		ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(rdev);
 		if (ret)
 			return ret;
@@ -693,6 +714,25 @@
 	return ret;
 }
 
+static int ci_enable_thermal_based_sclk_dpm(struct radeon_device *rdev,
+					    bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result = PPSMC_Result_OK;
+
+	if (pi->thermal_sclk_dpm_enabled) {
+		if (enable)
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_ENABLE_THERMAL_DPM);
+		else
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DISABLE_THERMAL_DPM);
+	}
+
+	if (smc_result == PPSMC_Result_OK)
+		return 0;
+	else
+		return -EINVAL;
+}
+
 static int ci_power_control_set_level(struct radeon_device *rdev)
 {
 	struct ci_power_info *pi = ci_get_pi(rdev);
@@ -859,6 +899,7 @@
 
 	if (enable) {
 		thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+		WREG32_SMC(CG_THERMAL_INT, thermal_int);
 		rdev->irq.dpm_thermal = false;
 		result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Thermal_Cntl_Enable);
 		if (result != PPSMC_Result_OK) {
@@ -867,6 +908,7 @@
 		}
 	} else {
 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+		WREG32_SMC(CG_THERMAL_INT, thermal_int);
 		rdev->irq.dpm_thermal = true;
 		result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Thermal_Cntl_Disable);
 		if (result != PPSMC_Result_OK) {
@@ -875,11 +917,324 @@
 		}
 	}
 
-	WREG32_SMC(CG_THERMAL_INT, thermal_int);
+	return 0;
+}
+
+static void ci_fan_ctrl_set_static_mode(struct radeon_device *rdev, u32 mode)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	if (pi->fan_ctrl_is_in_default_mode) {
+		tmp = (RREG32_SMC(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT;
+		pi->fan_ctrl_default_mode = tmp;
+		tmp = (RREG32_SMC(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT;
+		pi->t_min = tmp;
+		pi->fan_ctrl_is_in_default_mode = false;
+	}
+
+	tmp = RREG32_SMC(CG_FDO_CTRL2) & ~TMIN_MASK;
+	tmp |= TMIN(0);
+	WREG32_SMC(CG_FDO_CTRL2, tmp);
+
+	tmp = RREG32_SMC(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK;
+	tmp |= FDO_PWM_MODE(mode);
+	WREG32_SMC(CG_FDO_CTRL2, tmp);
+}
+
+static int ci_thermal_setup_fan_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	SMU7_Discrete_FanTable fan_table = { FDO_MODE_HARDWARE };
+	u32 duty100;
+	u32 t_diff1, t_diff2, pwm_diff1, pwm_diff2;
+	u16 fdo_min, slope1, slope2;
+	u32 reference_clock, tmp;
+	int ret;
+	u64 tmp64;
+
+	if (!pi->fan_table_start) {
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+		return 0;
+	}
+
+	duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+	if (duty100 == 0) {
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+		return 0;
+	}
+
+	tmp64 = (u64)rdev->pm.dpm.fan.pwm_min * duty100;
+	do_div(tmp64, 10000);
+	fdo_min = (u16)tmp64;
+
+	t_diff1 = rdev->pm.dpm.fan.t_med - rdev->pm.dpm.fan.t_min;
+	t_diff2 = rdev->pm.dpm.fan.t_high - rdev->pm.dpm.fan.t_med;
+
+	pwm_diff1 = rdev->pm.dpm.fan.pwm_med - rdev->pm.dpm.fan.pwm_min;
+	pwm_diff2 = rdev->pm.dpm.fan.pwm_high - rdev->pm.dpm.fan.pwm_med;
+
+	slope1 = (u16)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100);
+	slope2 = (u16)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100);
+
+	fan_table.TempMin = cpu_to_be16((50 + rdev->pm.dpm.fan.t_min) / 100);
+	fan_table.TempMed = cpu_to_be16((50 + rdev->pm.dpm.fan.t_med) / 100);
+	fan_table.TempMax = cpu_to_be16((50 + rdev->pm.dpm.fan.t_max) / 100);
+
+	fan_table.Slope1 = cpu_to_be16(slope1);
+	fan_table.Slope2 = cpu_to_be16(slope2);
+
+	fan_table.FdoMin = cpu_to_be16(fdo_min);
+
+	fan_table.HystDown = cpu_to_be16(rdev->pm.dpm.fan.t_hyst);
+
+	fan_table.HystUp = cpu_to_be16(1);
+
+	fan_table.HystSlope = cpu_to_be16(1);
+
+	fan_table.TempRespLim = cpu_to_be16(5);
+
+	reference_clock = radeon_get_xclk(rdev);
+
+	fan_table.RefreshPeriod = cpu_to_be32((rdev->pm.dpm.fan.cycle_delay *
+					       reference_clock) / 1600);
+
+	fan_table.FdoMax = cpu_to_be16((u16)duty100);
+
+	tmp = (RREG32_SMC(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT;
+	fan_table.TempSrc = (uint8_t)tmp;
+
+	ret = ci_copy_bytes_to_smc(rdev,
+				   pi->fan_table_start,
+				   (u8 *)(&fan_table),
+				   sizeof(fan_table),
+				   pi->sram_end);
+
+	if (ret) {
+		DRM_ERROR("Failed to load fan table to the SMC.");
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+	}
 
 	return 0;
 }
 
+static int ci_fan_ctrl_start_smc_fan_control(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result ret;
+
+	if (pi->caps_od_fuzzy_fan_control_support) {
+		ret = ci_send_msg_to_smc_with_parameter(rdev,
+							PPSMC_StartFanControl,
+							FAN_CONTROL_FUZZY);
+		if (ret != PPSMC_Result_OK)
+			return -EINVAL;
+		ret = ci_send_msg_to_smc_with_parameter(rdev,
+							PPSMC_MSG_SetFanPwmMax,
+							rdev->pm.dpm.fan.default_max_fan_pwm);
+		if (ret != PPSMC_Result_OK)
+			return -EINVAL;
+	} else {
+		ret = ci_send_msg_to_smc_with_parameter(rdev,
+							PPSMC_StartFanControl,
+							FAN_CONTROL_TABLE);
+		if (ret != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+#if 0
+static int ci_fan_ctrl_stop_smc_fan_control(struct radeon_device *rdev)
+{
+	PPSMC_Result ret;
+
+	ret = ci_send_msg_to_smc(rdev, PPSMC_StopFanControl);
+	if (ret == PPSMC_Result_OK)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static int ci_fan_ctrl_get_fan_speed_percent(struct radeon_device *rdev,
+					     u32 *speed)
+{
+	u32 duty, duty100;
+	u64 tmp64;
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+	duty = (RREG32_SMC(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT;
+
+	if (duty100 == 0)
+		return -EINVAL;
+
+	tmp64 = (u64)duty * 100;
+	do_div(tmp64, duty100);
+	*speed = (u32)tmp64;
+
+	if (*speed > 100)
+		*speed = 100;
+
+	return 0;
+}
+
+static int ci_fan_ctrl_set_fan_speed_percent(struct radeon_device *rdev,
+					     u32 speed)
+{
+	u32 tmp;
+	u32 duty, duty100;
+	u64 tmp64;
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (speed > 100)
+		return -EINVAL;
+
+	if (rdev->pm.dpm.fan.ucode_fan_control)
+		ci_fan_ctrl_stop_smc_fan_control(rdev);
+
+	duty100 = (RREG32_SMC(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+	if (duty100 == 0)
+		return -EINVAL;
+
+	tmp64 = (u64)speed * duty100;
+	do_div(tmp64, 100);
+	duty = (u32)tmp64;
+
+	tmp = RREG32_SMC(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK;
+	tmp |= FDO_STATIC_DUTY(duty);
+	WREG32_SMC(CG_FDO_CTRL0, tmp);
+
+	ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+	return 0;
+}
+
+static int ci_fan_ctrl_get_fan_speed_rpm(struct radeon_device *rdev,
+					 u32 *speed)
+{
+	u32 tach_period;
+	u32 xclk = radeon_get_xclk(rdev);
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (rdev->pm.fan_pulses_per_revolution == 0)
+		return -ENOENT;
+
+	tach_period = (RREG32_SMC(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT;
+	if (tach_period == 0)
+		return -ENOENT;
+
+	*speed = 60 * xclk * 10000 / tach_period;
+
+	return 0;
+}
+
+static int ci_fan_ctrl_set_fan_speed_rpm(struct radeon_device *rdev,
+					 u32 speed)
+{
+	u32 tach_period, tmp;
+	u32 xclk = radeon_get_xclk(rdev);
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (rdev->pm.fan_pulses_per_revolution == 0)
+		return -ENOENT;
+
+	if ((speed < rdev->pm.fan_min_rpm) ||
+	    (speed > rdev->pm.fan_max_rpm))
+		return -EINVAL;
+
+	if (rdev->pm.dpm.fan.ucode_fan_control)
+		ci_fan_ctrl_stop_smc_fan_control(rdev);
+
+	tach_period = 60 * xclk * 10000 / (8 * speed);
+	tmp = RREG32_SMC(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK;
+	tmp |= TARGET_PERIOD(tach_period);
+	WREG32_SMC(CG_TACH_CTRL, tmp);
+
+	ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+	return 0;
+}
+#endif
+
+static void ci_fan_ctrl_set_default_mode(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	if (!pi->fan_ctrl_is_in_default_mode) {
+		tmp = RREG32_SMC(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+		tmp |= FDO_PWM_MODE(pi->fan_ctrl_default_mode);
+		WREG32_SMC(CG_FDO_CTRL2, tmp);
+
+		tmp = RREG32_SMC(CG_FDO_CTRL2) & TMIN_MASK;
+		tmp |= TMIN(pi->t_min);
+		WREG32_SMC(CG_FDO_CTRL2, tmp);
+		pi->fan_ctrl_is_in_default_mode = true;
+	}
+}
+
+static void ci_thermal_start_smc_fan_control(struct radeon_device *rdev)
+{
+	if (rdev->pm.dpm.fan.ucode_fan_control) {
+		ci_fan_ctrl_start_smc_fan_control(rdev);
+		ci_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+	}
+}
+
+static void ci_thermal_initialize(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	if (rdev->pm.fan_pulses_per_revolution) {
+		tmp = RREG32_SMC(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK;
+		tmp |= EDGE_PER_REV(rdev->pm.fan_pulses_per_revolution -1);
+		WREG32_SMC(CG_TACH_CTRL, tmp);
+	}
+
+	tmp = RREG32_SMC(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK;
+	tmp |= TACH_PWM_RESP_RATE(0x28);
+	WREG32_SMC(CG_FDO_CTRL2, tmp);
+}
+
+static int ci_thermal_start_thermal_controller(struct radeon_device *rdev)
+{
+	int ret;
+
+	ci_thermal_initialize(rdev);
+	ret = ci_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+	if (ret)
+		return ret;
+	ret = ci_thermal_enable_alert(rdev, true);
+	if (ret)
+		return ret;
+	if (rdev->pm.dpm.fan.ucode_fan_control) {
+		ret = ci_thermal_setup_fan_table(rdev);
+		if (ret)
+			return ret;
+		ci_thermal_start_smc_fan_control(rdev);
+	}
+
+	return 0;
+}
+
+static void ci_thermal_stop_thermal_controller(struct radeon_device *rdev)
+{
+	if (!rdev->pm.no_fan)
+		ci_fan_ctrl_set_default_mode(rdev);
+}
+
 #if 0
 static int ci_read_smc_soft_register(struct radeon_device *rdev,
 				     u16 reg_offset, u32 *value)
@@ -3397,6 +3752,8 @@
 	struct ci_power_info *pi = ci_get_pi(rdev);
 	PPSMC_Result result;
 
+	ci_apply_disp_minimum_voltage_request(rdev);
+
 	if (!pi->sclk_dpm_key_disabled) {
 		if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
 			result = ci_send_msg_to_smc_with_parameter(rdev,
@@ -3416,7 +3773,7 @@
 				return -EINVAL;
 		}
 	}
-
+#if 0
 	if (!pi->pcie_dpm_key_disabled) {
 		if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
 			result = ci_send_msg_to_smc_with_parameter(rdev,
@@ -3426,9 +3783,7 @@
 				return -EINVAL;
 		}
 	}
-
-	ci_apply_disp_minimum_voltage_request(rdev);
-
+#endif
 	return 0;
 }
 
@@ -3454,7 +3809,7 @@
 		pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
 	} else {
 		/* XXX check display min clock requirements */
-		if (0 != CISLAND_MINIMUM_ENGINE_CLOCK)
+		if (CISLAND_MINIMUM_ENGINE_CLOCK != CISLAND_MINIMUM_ENGINE_CLOCK)
 			pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK;
 	}
 
@@ -3788,6 +4143,25 @@
 	int ret;
 
 	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
+		if ((!pi->pcie_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
+			levels = 0;
+			tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+			while (tmp >>= 1)
+				levels++;
+			if (levels) {
+				ret = ci_dpm_force_state_pcie(rdev, level);
+				if (ret)
+					return ret;
+				for (i = 0; i < rdev->usec_timeout; i++) {
+					tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
+					       CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
+					if (tmp == levels)
+						break;
+					udelay(1);
+				}
+			}
+		}
 		if ((!pi->sclk_dpm_key_disabled) &&
 		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
 			levels = 0;
@@ -3826,25 +4200,6 @@
 				}
 			}
 		}
-		if ((!pi->pcie_dpm_key_disabled) &&
-		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
-			levels = 0;
-			tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
-			while (tmp >>= 1)
-				levels++;
-			if (levels) {
-				ret = ci_dpm_force_state_pcie(rdev, level);
-				if (ret)
-					return ret;
-				for (i = 0; i < rdev->usec_timeout; i++) {
-					tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
-					       CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
-					if (tmp == levels)
-						break;
-					udelay(1);
-				}
-			}
-		}
 	} else if (level == RADEON_DPM_FORCED_LEVEL_LOW) {
 		if ((!pi->sclk_dpm_key_disabled) &&
 		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -3892,6 +4247,14 @@
 			}
 		}
 	} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
+		if (!pi->pcie_dpm_key_disabled) {
+			PPSMC_Result smc_result;
+
+			smc_result = ci_send_msg_to_smc(rdev,
+							PPSMC_MSG_PCIeDPM_UnForceLevel);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
 		ret = ci_upload_dpm_level_enable_mask(rdev);
 		if (ret)
 			return ret;
@@ -4841,6 +5204,14 @@
 
 	ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
 
+	ret = ci_enable_thermal_based_sclk_dpm(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_thermal_based_sclk_dpm failed\n");
+		return ret;
+	}
+
+	ci_thermal_start_thermal_controller(rdev);
+
 	ci_update_current_ps(rdev, boot_ps);
 
 	return 0;
@@ -4886,6 +5257,8 @@
 	if (!ci_is_smc_running(rdev))
 		return;
 
+	ci_thermal_stop_thermal_controller(rdev);
+
 	if (pi->thermal_protection)
 		ci_enable_thermal_protection(rdev, false);
 	ci_enable_power_containment(rdev, false);
@@ -4900,6 +5273,7 @@
 	ci_reset_to_default(rdev);
 	ci_dpm_stop_smc(rdev);
 	ci_force_switch_to_arb_f0(rdev);
+	ci_enable_thermal_based_sclk_dpm(rdev, false);
 
 	ci_update_current_ps(rdev, boot_ps);
 }
@@ -5299,6 +5673,7 @@
 	pi->sclk_dpm_key_disabled = 0;
 	pi->mclk_dpm_key_disabled = 0;
 	pi->pcie_dpm_key_disabled = 0;
+	pi->thermal_sclk_dpm_enabled = 0;
 
 	/* mclk dpm is unstable on some R7 260X cards with the old mc ucode */
 	if ((rdev->pdev->device == 0x6658) &&
@@ -5406,7 +5781,7 @@
 			tmp |= DPM_ENABLED;
 			break;
 		default:
-			DRM_ERROR("Invalid PCC GPIO!");
+			DRM_ERROR("Invalid PCC GPIO: %u!\n", gpio.shift);
 			break;
 		}
 		WREG32_SMC(CNB_PWRMGT_CNTL, tmp);
@@ -5473,6 +5848,9 @@
 		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
 			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
 
+	pi->fan_ctrl_is_in_default_mode = true;
+	rdev->pm.dpm.fan.ucode_fan_control = false;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/ci_dpm.h b/drivers/gpu/drm/radeon/ci_dpm.h
index 615cb2c..84e3d3b 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.h
+++ b/drivers/gpu/drm/radeon/ci_dpm.h
@@ -239,6 +239,7 @@
 	u32 sclk_dpm_key_disabled;
 	u32 mclk_dpm_key_disabled;
 	u32 pcie_dpm_key_disabled;
+	u32 thermal_sclk_dpm_enabled;
 	struct ci_pcie_perf_range pcie_gen_performance;
 	struct ci_pcie_perf_range pcie_lane_performance;
 	struct ci_pcie_perf_range pcie_gen_powersaving;
@@ -266,6 +267,7 @@
 	bool caps_automatic_dc_transition;
 	bool caps_sclk_throttle_low_notification;
 	bool caps_dynamic_ac_timing;
+	bool caps_od_fuzzy_fan_control_support;
 	/* flags */
 	bool thermal_protection;
 	bool pcie_performance_request;
@@ -287,6 +289,10 @@
 	struct ci_ps current_ps;
 	struct radeon_ps requested_rps;
 	struct ci_ps requested_ps;
+	/* fan control */
+	bool fan_ctrl_is_in_default_mode;
+	u32 t_min;
+	u32 fan_ctrl_default_mode;
 };
 
 #define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index bce73b6..3deeed3 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3994,31 +3994,27 @@
 				    unsigned num_gpu_pages,
 				    struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, control;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
@@ -4042,12 +4038,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
@@ -4070,6 +4066,7 @@
 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 	u32 header, control = INDIRECT_BUFFER_VALID;
 
 	if (ib->is_const_ib) {
@@ -4098,8 +4095,7 @@
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 	}
 
-	control |= ib->length_dw |
-		(ib->vm ? (ib->vm->id << 24) : 0);
+	control |= ib->length_dw | (vm_id << 24);
 
 	radeon_ring_write(ring, header);
 	radeon_ring_write(ring,
@@ -5982,26 +5978,23 @@
  * Update the page table base and flush the VM TLB
  * using the CP (CIK).
  */
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		  unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
-
-	if (vm == NULL)
-		return;
+	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
 
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
-	if (vm->id < 8) {
+	if (vm_id < 8) {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
 	} else {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
 	}
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* update SH_MEM_* regs */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6009,7 +6002,7 @@
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, VMID(vm->id));
+	radeon_ring_write(ring, VMID(vm_id));
 
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6030,7 +6023,7 @@
 	radeon_ring_write(ring, VMID(0));
 
 	/* HDP flush */
-	cik_hdp_flush_cp_ring_emit(rdev, ridx);
+	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
 
 	/* bits 0-15 are the VM contexts0-15 */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6038,7 +6031,7 @@
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 
 	/* compute doesn't have PFP */
 	if (usepfp) {
@@ -6344,6 +6337,7 @@
 		}
 
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+		data |= 0x00000001;
 		data &= 0xfffffffd;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
@@ -6377,7 +6371,7 @@
 		}
 	} else {
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
-		data |= 0x00000002;
+		data |= 0x00000003;
 		if (orig != data)
 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
 
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 4e8432d..54b9837 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -134,7 +134,7 @@
 			      struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
-	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+	u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
 
 	if (rdev->wb.enabled) {
 		u32 next_rptr = ring->wptr + 5;
@@ -541,31 +541,27 @@
 				  unsigned num_gpu_pages,
 				  struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
@@ -586,12 +582,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
@@ -901,25 +897,21 @@
  * Update the page table base and flush the VM TLB
  * using sDMA (CIK).
  */
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		      unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vm->id < 8) {
-		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+	if (vm_id < 8) {
+		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
 	} else {
-		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
 	}
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* update SH_MEM_* regs */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-	radeon_ring_write(ring, VMID(vm->id));
+	radeon_ring_write(ring, VMID(vm_id));
 
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
@@ -942,11 +934,11 @@
 	radeon_ring_write(ring, VMID(0));
 
 	/* flush HDP */
-	cik_sdma_hdp_flush_ring_emit(rdev, ridx);
+	cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
 
 	/* flush TLB */
 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 }
 
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index 068cbb0..e4e88ca 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -186,7 +186,10 @@
 #define		DIG_THERM_DPM(x)			((x) << 14)
 #define		DIG_THERM_DPM_MASK			0x003FC000
 #define		DIG_THERM_DPM_SHIFT			14
-
+#define	CG_THERMAL_STATUS				0xC0300008
+#define		FDO_PWM_DUTY(x)				((x) << 9)
+#define		FDO_PWM_DUTY_MASK			(0xff << 9)
+#define		FDO_PWM_DUTY_SHIFT			9
 #define	CG_THERMAL_INT					0xC030000C
 #define		CI_DIG_THERM_INTH(x)			((x) << 8)
 #define		CI_DIG_THERM_INTH_MASK			0x0000FF00
@@ -196,7 +199,10 @@
 #define		CI_DIG_THERM_INTL_SHIFT			16
 #define 	THERM_INT_MASK_HIGH			(1 << 24)
 #define 	THERM_INT_MASK_LOW			(1 << 25)
-
+#define	CG_MULT_THERMAL_CTRL				0xC0300010
+#define		TEMP_SEL(x)				((x) << 20)
+#define		TEMP_SEL_MASK				(0xff << 20)
+#define		TEMP_SEL_SHIFT				20
 #define	CG_MULT_THERMAL_STATUS				0xC0300014
 #define		ASIC_MAX_TEMP(x)			((x) << 0)
 #define		ASIC_MAX_TEMP_MASK			0x000001ff
@@ -205,6 +211,36 @@
 #define		CTF_TEMP_MASK				0x0003fe00
 #define		CTF_TEMP_SHIFT				9
 
+#define	CG_FDO_CTRL0					0xC0300064
+#define		FDO_STATIC_DUTY(x)			((x) << 0)
+#define		FDO_STATIC_DUTY_MASK			0x0000000F
+#define		FDO_STATIC_DUTY_SHIFT			0
+#define	CG_FDO_CTRL1					0xC0300068
+#define		FMAX_DUTY100(x)				((x) << 0)
+#define		FMAX_DUTY100_MASK			0x0000000F
+#define		FMAX_DUTY100_SHIFT			0
+#define	CG_FDO_CTRL2					0xC030006C
+#define		TMIN(x)					((x) << 0)
+#define		TMIN_MASK				0x0000000F
+#define		TMIN_SHIFT				0
+#define		FDO_PWM_MODE(x)				((x) << 11)
+#define		FDO_PWM_MODE_MASK			(3 << 11)
+#define		FDO_PWM_MODE_SHIFT			11
+#define		TACH_PWM_RESP_RATE(x)			((x) << 25)
+#define		TACH_PWM_RESP_RATE_MASK			(0x7f << 25)
+#define		TACH_PWM_RESP_RATE_SHIFT		25
+#define CG_TACH_CTRL                                    0xC0300070
+#       define EDGE_PER_REV(x)                          ((x) << 0)
+#       define EDGE_PER_REV_MASK                        (0x7 << 0)
+#       define EDGE_PER_REV_SHIFT                       0
+#       define TARGET_PERIOD(x)                         ((x) << 3)
+#       define TARGET_PERIOD_MASK                       0xfffffff8
+#       define TARGET_PERIOD_SHIFT                      3
+#define CG_TACH_STATUS                                  0xC0300074
+#       define TACH_PERIOD(x)                           ((x) << 0)
+#       define TACH_PERIOD_MASK                         0xffffffff
+#       define TACH_PERIOD_SHIFT                        0
+
 #define CG_ECLK_CNTL                                    0xC05000AC
 #       define ECLK_DIVIDER_MASK                        0x7f
 #       define ECLK_DIR_CNTL_EN                         (1 << 8)
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 66bcfad..96535aa 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -110,31 +110,27 @@
 					unsigned num_gpu_pages,
 					struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 	num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_dw = size_in_dw;
@@ -153,12 +149,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3faee58..360de9f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1373,6 +1373,7 @@
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 	u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
 		PACKET3_SH_ACTION_ENA;
 
@@ -1395,15 +1396,14 @@
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
-	radeon_ring_write(ring, ib->length_dw | 
-			  (ib->vm ? (ib->vm->id << 24) : 0));
+	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
 
 	/* flush read cache over gart for this vmid */
 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
 	radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl);
 	radeon_ring_write(ring, 0xFFFFFFFF);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */
+	radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */
 }
 
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
@@ -2502,15 +2502,11 @@
  * Update the page table base and flush the VM TLB
  * using the CP (cayman-si).
  */
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
-	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0));
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* flush hdp cache */
 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
@@ -2518,7 +2514,7 @@
 
 	/* bits 0-7 are the VM contexts0-7 */
 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 
 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index f26f0a9..50f8861 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -123,6 +123,7 @@
 				struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 
 	if (rdev->wb.enabled) {
 		u32 next_rptr = ring->wptr + 4;
@@ -140,7 +141,7 @@
 	 */
 	while ((ring->wptr & 7) != 5)
 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
 	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
 
@@ -446,16 +447,12 @@
 		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
 }
 
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* flush hdp cache */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
@@ -465,6 +462,6 @@
 	/* bits 0-7 are the VM contexts0-7 */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 }
 
diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h
index 11c0e4d..7e5724a 100644
--- a/drivers/gpu/drm/radeon/ppsmc.h
+++ b/drivers/gpu/drm/radeon/ppsmc.h
@@ -56,6 +56,14 @@
 #define PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE 0x20
 #define PPSMC_STATEFLAG_DEEPSLEEP_BYPASS   0x40
 
+#define FDO_MODE_HARDWARE 0
+#define FDO_MODE_PIECE_WISE_LINEAR 1
+
+enum FAN_CONTROL {
+	FAN_CONTROL_FUZZY,
+	FAN_CONTROL_TABLE
+};
+
 #define PPSMC_Result_OK             ((uint8_t)0x01)
 #define PPSMC_Result_Failed         ((uint8_t)0xFF)
 
@@ -79,6 +87,8 @@
 #define PPSMC_MSG_DisableCac                ((uint8_t)0x54)
 #define PPSMC_TDPClampingActive             ((uint8_t)0x59)
 #define PPSMC_TDPClampingInactive           ((uint8_t)0x5A)
+#define PPSMC_StartFanControl               ((uint8_t)0x5B)
+#define PPSMC_StopFanControl                ((uint8_t)0x5C)
 #define PPSMC_MSG_NoDisplay                 ((uint8_t)0x5D)
 #define PPSMC_MSG_HasDisplay                ((uint8_t)0x5E)
 #define PPSMC_MSG_UVDPowerOFF               ((uint8_t)0x60)
@@ -150,6 +160,10 @@
 #define PPSMC_MSG_MASTER_DeepSleep_ON         ((uint16_t) 0x18F)
 #define PPSMC_MSG_MASTER_DeepSleep_OFF        ((uint16_t) 0x190)
 #define PPSMC_MSG_Remove_DC_Clamp             ((uint16_t) 0x191)
+#define PPSMC_MSG_SetFanPwmMax                ((uint16_t) 0x19A)
+
+#define PPSMC_MSG_ENABLE_THERMAL_DPM          ((uint16_t) 0x19C)
+#define PPSMC_MSG_DISABLE_THERMAL_DPM         ((uint16_t) 0x19D)
 
 #define PPSMC_MSG_API_GetSclkFrequency        ((uint16_t) 0x200)
 #define PPSMC_MSG_API_GetMclkFrequency        ((uint16_t) 0x201)
diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h
index 2d53299..4c2eec4 100644
--- a/drivers/gpu/drm/radeon/pptable.h
+++ b/drivers/gpu/drm/radeon/pptable.h
@@ -96,6 +96,14 @@
     USHORT  usTMax;                          // The max temperature
 } ATOM_PPLIB_FANTABLE2;
 
+typedef struct _ATOM_PPLIB_FANTABLE3
+{
+	ATOM_PPLIB_FANTABLE2 basicTable2;
+	UCHAR ucFanControlMode;
+	USHORT usFanPWMMax;
+	USHORT usFanOutputSensitivity;
+} ATOM_PPLIB_FANTABLE3;
+
 typedef struct _ATOM_PPLIB_EXTENDEDHEADER
 {
     USHORT  usSize;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 56b0292..ef5d606 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2889,31 +2889,27 @@
 				     unsigned num_gpu_pages,
 				     struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, tmp;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@@ -2942,12 +2938,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index aabc343..3a58b80 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -441,31 +441,27 @@
 				   unsigned num_gpu_pages,
 				   struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
 	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_dw = size_in_dw;
@@ -484,12 +480,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index f6309bd..76c6a17 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -811,6 +811,7 @@
 union fan_info {
 	struct _ATOM_PPLIB_FANTABLE fan;
 	struct _ATOM_PPLIB_FANTABLE2 fan2;
+	struct _ATOM_PPLIB_FANTABLE3 fan3;
 };
 
 static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependency_table *radeon_table,
@@ -900,6 +901,14 @@
 			else
 				rdev->pm.dpm.fan.t_max = 10900;
 			rdev->pm.dpm.fan.cycle_delay = 100000;
+			if (fan_info->fan.ucFanTableFormat >= 3) {
+				rdev->pm.dpm.fan.control_mode = fan_info->fan3.ucFanControlMode;
+				rdev->pm.dpm.fan.default_max_fan_pwm =
+					le16_to_cpu(fan_info->fan3.usFanPWMMax);
+				rdev->pm.dpm.fan.default_fan_output_sensitivity = 4836;
+				rdev->pm.dpm.fan.fan_output_sensitivity =
+					le16_to_cpu(fan_info->fan3.usFanOutputSensitivity);
+			}
 			rdev->pm.dpm.fan.ucode_fan_control = true;
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h
index 46b9d2a..bd499d7 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.h
+++ b/drivers/gpu/drm/radeon/r600_dpm.h
@@ -96,6 +96,9 @@
 #define R600_TEMP_RANGE_MIN (90 * 1000)
 #define R600_TEMP_RANGE_MAX (120 * 1000)
 
+#define FDO_PWM_MODE_STATIC  1
+#define FDO_PWM_MODE_STATIC_RPM 5
+
 enum r600_power_level {
 	R600_POWER_LEVEL_LOW = 0,
 	R600_POWER_LEVEL_MEDIUM = 1,
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1f61ff0..3207bb6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -150,9 +150,6 @@
 /* number of hw syncs before falling back on blocking */
 #define RADEON_NUM_SYNCS			4
 
-/* number of hw syncs before falling back on blocking */
-#define RADEON_NUM_SYNCS			4
-
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET			(1 << 20)
 #define RADEON_VA_RESERVED_SIZE			(8 << 20)
@@ -363,14 +360,15 @@
 };
 
 struct radeon_fence {
-	struct fence base;
+	struct fence		base;
 
-	struct radeon_device		*rdev;
-	uint64_t			seq;
+	struct radeon_device	*rdev;
+	uint64_t		seq;
 	/* RB, DMA, etc. */
-	unsigned			ring;
+	unsigned		ring;
+	bool			is_vm_update;
 
-	wait_queue_t			fence_wake;
+	wait_queue_t		fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -458,6 +456,7 @@
 	struct list_head		bo_list;
 	uint32_t			flags;
 	uint64_t			addr;
+	struct radeon_fence		*last_pt_update;
 	unsigned			ref_count;
 
 	/* protected by vm mutex */
@@ -576,10 +575,9 @@
  * Semaphores.
  */
 struct radeon_semaphore {
-	struct radeon_sa_bo		*sa_bo;
-	signed				waiters;
-	uint64_t			gpu_addr;
-	struct radeon_fence		*sync_to[RADEON_NUM_RINGS];
+	struct radeon_sa_bo	*sa_bo;
+	signed			waiters;
+	uint64_t		gpu_addr;
 };
 
 int radeon_semaphore_create(struct radeon_device *rdev,
@@ -588,20 +586,33 @@
 				  struct radeon_semaphore *semaphore);
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 				struct radeon_semaphore *semaphore);
-void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
-				 struct radeon_fence *fence);
-int radeon_semaphore_sync_resv(struct radeon_device *rdev,
-			       struct radeon_semaphore *semaphore,
-			       struct reservation_object *resv,
-			       bool shared);
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
-				struct radeon_semaphore *semaphore,
-				int waiting_ring);
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence);
 
 /*
+ * Synchronization
+ */
+struct radeon_sync {
+	struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
+	struct radeon_fence	*sync_to[RADEON_NUM_RINGS];
+	struct radeon_fence	*last_vm_update;
+};
+
+void radeon_sync_create(struct radeon_sync *sync);
+void radeon_sync_fence(struct radeon_sync *sync,
+		       struct radeon_fence *fence);
+int radeon_sync_resv(struct radeon_device *rdev,
+		     struct radeon_sync *sync,
+		     struct reservation_object *resv,
+		     bool shared);
+int radeon_sync_rings(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+		      int waiting_ring);
+void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
+		      struct radeon_fence *fence);
+
+/*
  * GART structures, functions & helpers
  */
 struct radeon_mc;
@@ -818,7 +829,7 @@
 	struct radeon_fence		*fence;
 	struct radeon_vm		*vm;
 	bool				is_const_ib;
-	struct radeon_semaphore		*semaphore;
+	struct radeon_sync		sync;
 };
 
 struct radeon_ring {
@@ -895,33 +906,37 @@
 	uint64_t			addr;
 };
 
+struct radeon_vm_id {
+	unsigned		id;
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct radeon_fence	*flushed_updates;
+	/* last use of vmid */
+	struct radeon_fence	*last_id_use;
+};
+
 struct radeon_vm {
-	struct rb_root			va;
-	unsigned			id;
+	struct mutex		mutex;
+
+	struct rb_root		va;
 
 	/* BOs moved, but not yet updated in the PT */
-	struct list_head		invalidated;
+	struct list_head	invalidated;
 
 	/* BOs freed, but not yet updated in the PT */
-	struct list_head		freed;
+	struct list_head	freed;
 
 	/* contains the page directory */
-	struct radeon_bo		*page_directory;
-	uint64_t			pd_gpu_addr;
-	unsigned			max_pde_used;
+	struct radeon_bo	*page_directory;
+	unsigned		max_pde_used;
 
 	/* array of page tables, one for each page directory entry */
-	struct radeon_vm_pt		*page_tables;
+	struct radeon_vm_pt	*page_tables;
 
-	struct radeon_bo_va		*ib_bo_va;
+	struct radeon_bo_va	*ib_bo_va;
 
-	struct mutex			mutex;
-	/* last fence for cs using this vm */
-	struct radeon_fence		*fence;
-	/* last flush or NULL if we still need to flush */
-	struct radeon_fence		*last_flush;
-	/* last use of vmid */
-	struct radeon_fence		*last_id_use;
+	/* for id and flush management per ring */
+	struct radeon_vm_id	ids[RADEON_NUM_RINGS];
 };
 
 struct radeon_vm_manager {
@@ -1494,6 +1509,10 @@
 	u8 t_hyst;
 	u32 cycle_delay;
 	u16 t_max;
+	u8 control_mode;
+	u16 default_max_fan_pwm;
+	u16 default_fan_output_sensitivity;
+	u16 fan_output_sensitivity;
 	bool ucode_fan_control;
 };
 
@@ -1794,7 +1813,8 @@
 	void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
 	bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
 			       struct radeon_semaphore *semaphore, bool emit_wait);
-	void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+	void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr);
 
 	/* testing functions */
 	int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
@@ -2846,7 +2866,7 @@
 #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
 #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
-#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm))
+#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr))
 #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
 #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
 #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
@@ -2962,7 +2982,7 @@
 				       struct radeon_vm *vm, int ring);
 void radeon_vm_flush(struct radeon_device *rdev,
                      struct radeon_vm *vm,
-                     int ring);
+		     int ring, struct radeon_fence *fence);
 void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index d8ace5b..2a45d54 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -599,7 +599,8 @@
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
-void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -624,7 +625,8 @@
 			     uint32_t incr, uint32_t flags);
 void cayman_dma_vm_pad_ib(struct radeon_ib *ib);
 
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+			 unsigned vm_id, uint64_t pd_addr);
 
 u32 cayman_gfx_get_rptr(struct radeon_device *rdev,
 			struct radeon_ring *ring);
@@ -699,7 +701,8 @@
 int si_irq_process(struct radeon_device *rdev);
 int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		 unsigned vm_id, uint64_t pd_addr);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
 				 uint64_t src_offset, uint64_t dst_offset,
@@ -721,7 +724,8 @@
 			 uint64_t addr, unsigned count,
 			 uint32_t incr, uint32_t flags);
 
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr);
 u32 si_get_xclk(struct radeon_device *rdev);
 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
@@ -793,7 +797,8 @@
 int cik_irq_process(struct radeon_device *rdev);
 int cik_vm_init(struct radeon_device *rdev);
 void cik_vm_fini(struct radeon_device *rdev);
-void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		  unsigned vm_id, uint64_t pd_addr);
 
 void cik_sdma_vm_copy_pages(struct radeon_device *rdev,
 			    struct radeon_ib *ib,
@@ -811,7 +816,8 @@
 			   uint32_t incr, uint32_t flags);
 void cik_sdma_vm_pad_ib(struct radeon_ib *ib);
 
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		      unsigned vm_id, uint64_t pd_addr);
 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
 		     struct radeon_ring *ring);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index a3e7aed..75f22e5 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -260,8 +260,8 @@
 			continue;
 
 		resv = p->relocs[i].robj->tbo.resv;
-		r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv,
-					       p->relocs[i].tv.shared);
+		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
+				     p->relocs[i].tv.shared);
 
 		if (r)
 			break;
@@ -285,9 +285,7 @@
 	INIT_LIST_HEAD(&p->validated);
 	p->idx = 0;
 	p->ib.sa_bo = NULL;
-	p->ib.semaphore = NULL;
 	p->const_ib.sa_bo = NULL;
-	p->const_ib.semaphore = NULL;
 	p->chunk_ib_idx = -1;
 	p->chunk_relocs_idx = -1;
 	p->chunk_flags_idx = -1;
@@ -507,6 +505,9 @@
 	if (r)
 		return r;
 
+	radeon_sync_resv(p->rdev, &p->ib.sync, vm->page_directory->tbo.resv,
+			 true);
+
 	r = radeon_vm_clear_freed(rdev, vm);
 	if (r)
 		return r;
@@ -538,6 +539,8 @@
 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 		if (r)
 			return r;
+
+		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 	}
 
 	return radeon_vm_clear_invalids(rdev, vm);
@@ -582,7 +585,6 @@
 			DRM_ERROR("Failed to sync rings: %i\n", r);
 		goto out;
 	}
-	radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 9630e8d..85f38ee 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -117,106 +117,7 @@
 	}
 }
 
-static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
-			      uint64_t gpu_addr)
-{
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_device *rdev = crtc->dev->dev_private;
-
-	if (ASIC_IS_DCE4(rdev)) {
-		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
-		       upper_32_bits(gpu_addr));
-		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
-		       gpu_addr & 0xffffffff);
-	} else if (ASIC_IS_AVIVO(rdev)) {
-		if (rdev->family >= CHIP_RV770) {
-			if (radeon_crtc->crtc_id)
-				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
-			else
-				WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
-		}
-		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
-		       gpu_addr & 0xffffffff);
-	} else {
-		radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
-		/* offset is from DISP(2)_BASE_ADDRESS */
-		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
-	}
-}
-
-int radeon_crtc_cursor_set(struct drm_crtc *crtc,
-			   struct drm_file *file_priv,
-			   uint32_t handle,
-			   uint32_t width,
-			   uint32_t height)
-{
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_device *rdev = crtc->dev->dev_private;
-	struct drm_gem_object *obj;
-	struct radeon_bo *robj;
-	uint64_t gpu_addr;
-	int ret;
-
-	if (!handle) {
-		/* turn off cursor */
-		radeon_hide_cursor(crtc);
-		obj = NULL;
-		goto unpin;
-	}
-
-	if ((width > radeon_crtc->max_cursor_width) ||
-	    (height > radeon_crtc->max_cursor_height)) {
-		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
-		return -EINVAL;
-	}
-
-	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
-	if (!obj) {
-		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
-		return -ENOENT;
-	}
-
-	robj = gem_to_radeon_bo(obj);
-	ret = radeon_bo_reserve(robj, false);
-	if (unlikely(ret != 0))
-		goto fail;
-	/* Only 27 bit offset for legacy cursor */
-	ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
-				       ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
-				       &gpu_addr);
-	radeon_bo_unreserve(robj);
-	if (ret)
-		goto fail;
-
-	radeon_crtc->cursor_width = width;
-	radeon_crtc->cursor_height = height;
-
-	radeon_lock_cursor(crtc, true);
-	radeon_set_cursor(crtc, obj, gpu_addr);
-	radeon_show_cursor(crtc);
-	radeon_lock_cursor(crtc, false);
-
-unpin:
-	if (radeon_crtc->cursor_bo) {
-		robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
-		ret = radeon_bo_reserve(robj, false);
-		if (likely(ret == 0)) {
-			radeon_bo_unpin(robj);
-			radeon_bo_unreserve(robj);
-		}
-		drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
-	}
-
-	radeon_crtc->cursor_bo = obj;
-	return 0;
-fail:
-	drm_gem_object_unreference_unlocked(obj);
-
-	return ret;
-}
-
-int radeon_crtc_cursor_move(struct drm_crtc *crtc,
-			    int x, int y)
+static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
@@ -281,7 +182,6 @@
 		}
 	}
 
-	radeon_lock_cursor(crtc, true);
 	if (ASIC_IS_DCE4(rdev)) {
 		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset, (x << 16) | y);
 		WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
@@ -308,7 +208,134 @@
 		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset +
 								      (yorigin * 256)));
 	}
-	radeon_lock_cursor(crtc, false);
+
+	radeon_crtc->cursor_x = x;
+	radeon_crtc->cursor_y = y;
 
 	return 0;
 }
+
+int radeon_crtc_cursor_move(struct drm_crtc *crtc,
+			    int x, int y)
+{
+	int ret;
+
+	radeon_lock_cursor(crtc, true);
+	ret = radeon_cursor_move_locked(crtc, x, y);
+	radeon_lock_cursor(crtc, false);
+
+	return ret;
+}
+
+static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
+			      uint64_t gpu_addr, int hot_x, int hot_y)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+		       upper_32_bits(gpu_addr));
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+		       gpu_addr & 0xffffffff);
+	} else if (ASIC_IS_AVIVO(rdev)) {
+		if (rdev->family >= CHIP_RV770) {
+			if (radeon_crtc->crtc_id)
+				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
+			else
+				WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr));
+		}
+		WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+		       gpu_addr & 0xffffffff);
+	} else {
+		radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr;
+		/* offset is from DISP(2)_BASE_ADDRESS */
+		WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset);
+	}
+
+	if (hot_x != radeon_crtc->cursor_hot_x ||
+	    hot_y != radeon_crtc->cursor_hot_y) {
+		int x, y;
+
+		x = radeon_crtc->cursor_x + radeon_crtc->cursor_hot_x - hot_x;
+		y = radeon_crtc->cursor_y + radeon_crtc->cursor_hot_y - hot_y;
+
+		radeon_cursor_move_locked(crtc, x, y);
+
+		radeon_crtc->cursor_hot_x = hot_x;
+		radeon_crtc->cursor_hot_y = hot_y;
+	}
+}
+
+int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
+			    struct drm_file *file_priv,
+			    uint32_t handle,
+			    uint32_t width,
+			    uint32_t height,
+			    int32_t hot_x,
+			    int32_t hot_y)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_device *rdev = crtc->dev->dev_private;
+	struct drm_gem_object *obj;
+	struct radeon_bo *robj;
+	uint64_t gpu_addr;
+	int ret;
+
+	if (!handle) {
+		/* turn off cursor */
+		radeon_hide_cursor(crtc);
+		obj = NULL;
+		goto unpin;
+	}
+
+	if ((width > radeon_crtc->max_cursor_width) ||
+	    (height > radeon_crtc->max_cursor_height)) {
+		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+		return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, radeon_crtc->crtc_id);
+		return -ENOENT;
+	}
+
+	robj = gem_to_radeon_bo(obj);
+	ret = radeon_bo_reserve(robj, false);
+	if (unlikely(ret != 0))
+		goto fail;
+	/* Only 27 bit offset for legacy cursor */
+	ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM,
+				       ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27,
+				       &gpu_addr);
+	radeon_bo_unreserve(robj);
+	if (ret)
+		goto fail;
+
+	radeon_crtc->cursor_width = width;
+	radeon_crtc->cursor_height = height;
+
+	radeon_lock_cursor(crtc, true);
+	radeon_set_cursor(crtc, obj, gpu_addr, hot_x, hot_y);
+	radeon_show_cursor(crtc);
+	radeon_lock_cursor(crtc, false);
+
+unpin:
+	if (radeon_crtc->cursor_bo) {
+		robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
+		ret = radeon_bo_reserve(robj, false);
+		if (likely(ret == 0)) {
+			radeon_bo_unpin(robj);
+			radeon_bo_unreserve(robj);
+		}
+		drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
+	}
+
+	radeon_crtc->cursor_bo = obj;
+	return 0;
+fail:
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index f1b0fa1..1021169 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -635,7 +635,7 @@
 	return ret;
 }
 static const struct drm_crtc_funcs radeon_crtc_funcs = {
-	.cursor_set = radeon_crtc_cursor_set,
+	.cursor_set2 = radeon_crtc_cursor_set2,
 	.cursor_move = radeon_crtc_cursor_move,
 	.gamma_set = radeon_crtc_gamma_set,
 	.set_config = radeon_crtc_set_config,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 9951670..d13d1b5 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -140,6 +140,7 @@
 	(*fence)->rdev = rdev;
 	(*fence)->seq = seq;
 	(*fence)->ring = ring;
+	(*fence)->is_vm_update = false;
 	fence_init(&(*fence)->base, &radeon_fence_ops,
 		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 429213b..12cfaea 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -535,6 +535,68 @@
 	return r;
 }
 
+/**
+ * radeon_gem_va_update_vm -update the bo_va in its VM
+ *
+ * @rdev: radeon_device pointer
+ * @bo_va: bo_va to update
+ *
+ * Update the bo_va directly after setting it's address. Errors are not
+ * vital here, so they are not reported back to userspace.
+ */
+static void radeon_gem_va_update_vm(struct radeon_device *rdev,
+				    struct radeon_bo_va *bo_va)
+{
+	struct ttm_validate_buffer tv, *entry;
+	struct radeon_cs_reloc *vm_bos;
+	struct ww_acquire_ctx ticket;
+	struct list_head list;
+	unsigned domain;
+	int r;
+
+	INIT_LIST_HEAD(&list);
+
+	tv.bo = &bo_va->bo->tbo;
+	tv.shared = true;
+	list_add(&tv.head, &list);
+
+	vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list);
+	if (!vm_bos)
+		return;
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true);
+	if (r)
+		goto error_free;
+
+	list_for_each_entry(entry, &list, head) {
+		domain = radeon_mem_type_to_domain(entry->bo->mem.mem_type);
+		/* if anything is swapped out don't swap it in here,
+		   just abort and wait for the next CS */
+		if (domain == RADEON_GEM_DOMAIN_CPU)
+			goto error_unreserve;
+	}
+
+	mutex_lock(&bo_va->vm->mutex);
+	r = radeon_vm_clear_freed(rdev, bo_va->vm);
+	if (r)
+		goto error_unlock;
+
+	if (bo_va->it.start)
+		r = radeon_vm_bo_update(rdev, bo_va, &bo_va->bo->tbo.mem);
+
+error_unlock:
+	mutex_unlock(&bo_va->vm->mutex);
+
+error_unreserve:
+	ttm_eu_backoff_reservation(&ticket, &list);
+
+error_free:
+	drm_free_large(vm_bos);
+
+	if (r)
+		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+}
+
 int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
@@ -618,6 +680,7 @@
 		if (bo_va->it.start) {
 			args->operation = RADEON_VA_RESULT_VA_EXIST;
 			args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
+			radeon_bo_unreserve(rbo);
 			goto out;
 		}
 		r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
@@ -628,12 +691,13 @@
 	default:
 		break;
 	}
+	if (!r)
+		radeon_gem_va_update_vm(rdev, bo_va);
 	args->operation = RADEON_VA_RESULT_OK;
 	if (r) {
 		args->operation = RADEON_VA_RESULT_ERROR;
 	}
 out:
-	radeon_bo_unreserve(rbo);
 	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 3f39fcc..c39ce1f 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -64,10 +64,7 @@
 		return r;
 	}
 
-	r = radeon_semaphore_create(rdev, &ib->semaphore);
-	if (r) {
-		return r;
-	}
+	radeon_sync_create(&ib->sync);
 
 	ib->ring = ring;
 	ib->fence = NULL;
@@ -96,7 +93,7 @@
  */
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
+	radeon_sync_free(rdev, &ib->sync, ib->fence);
 	radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
 	radeon_fence_unref(&ib->fence);
 }
@@ -145,11 +142,11 @@
 	if (ib->vm) {
 		struct radeon_fence *vm_id_fence;
 		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-		radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
+		radeon_sync_fence(&ib->sync, vm_id_fence);
 	}
 
 	/* sync with other rings */
-	r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
+	r = radeon_sync_rings(rdev, &ib->sync, ib->ring);
 	if (r) {
 		dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
 		radeon_ring_unlock_undo(rdev, ring);
@@ -157,11 +154,12 @@
 	}
 
 	if (ib->vm)
-		radeon_vm_flush(rdev, ib->vm, ib->ring);
+		radeon_vm_flush(rdev, ib->vm, ib->ring,
+				ib->sync.last_vm_update);
 
 	if (const_ib) {
 		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
-		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
+		radeon_sync_free(rdev, &const_ib->sync, NULL);
 	}
 	radeon_ring_ib_execute(rdev, ib->ring, ib);
 	r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 6eb561d..f4dd26a 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -628,8 +628,6 @@
 						  RADEON_VA_IB_OFFSET,
 						  RADEON_VM_PAGE_READABLE |
 						  RADEON_VM_PAGE_SNOOPED);
-
-			radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
 				kfree(fpriv);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 64eba7e..f3d87cd 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -321,6 +321,10 @@
 	uint32_t crtc_offset;
 	struct drm_gem_object *cursor_bo;
 	uint64_t cursor_addr;
+	int cursor_x;
+	int cursor_y;
+	int cursor_hot_x;
+	int cursor_hot_y;
 	int cursor_width;
 	int cursor_height;
 	int max_cursor_width;
@@ -805,11 +809,13 @@
 extern int radeon_crtc_do_set_base(struct drm_crtc *crtc,
 				   struct drm_framebuffer *fb,
 				   int x, int y, int atomic);
-extern int radeon_crtc_cursor_set(struct drm_crtc *crtc,
-				  struct drm_file *file_priv,
-				  uint32_t handle,
-				  uint32_t width,
-				  uint32_t height);
+extern int radeon_crtc_cursor_set2(struct drm_crtc *crtc,
+				   struct drm_file *file_priv,
+				   uint32_t handle,
+				   uint32_t width,
+				   uint32_t height,
+				   int32_t hot_x,
+				   int32_t hot_y);
 extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 				   int x, int y);
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 76eedd6..87b00d9 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -821,3 +821,22 @@
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
+
+/**
+ * radeon_bo_fence - add fence to buffer object
+ *
+ * @bo: buffer object in question
+ * @fence: fence to add
+ * @shared: true if fence should be added shared
+ *
+ */
+void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+                     bool shared)
+{
+	struct reservation_object *resv = bo->tbo.resv;
+
+	if (shared)
+		reservation_object_add_shared_fence(resv, &fence->base);
+	else
+		reservation_object_add_excl_fence(resv, &fence->base);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 1b8ec79..3b0b377 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -155,6 +155,8 @@
 				  struct ttm_mem_reg *new_mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
+extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
+			    bool shared);
 
 /*
  * sub allocation
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 6deb08f..e6ad54c 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -34,15 +34,14 @@
 int radeon_semaphore_create(struct radeon_device *rdev,
 			    struct radeon_semaphore **semaphore)
 {
-	uint64_t *cpu_addr;
-	int i, r;
+	int r;
 
 	*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
 	if (*semaphore == NULL) {
 		return -ENOMEM;
 	}
-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
-			     8 * RADEON_NUM_SYNCS, 8);
+	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
+			     &(*semaphore)->sa_bo, 8, 8);
 	if (r) {
 		kfree(*semaphore);
 		*semaphore = NULL;
@@ -51,12 +50,7 @@
 	(*semaphore)->waiters = 0;
 	(*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
 
-	cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
-	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
-		cpu_addr[i] = 0;
-
-	for (i = 0; i < RADEON_NUM_RINGS; ++i)
-		(*semaphore)->sync_to[i] = NULL;
+	*((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
 
 	return 0;
 }
@@ -95,146 +89,6 @@
 	return false;
 }
 
-/**
- * radeon_semaphore_sync_fence - use the semaphore to sync to a fence
- *
- * @semaphore: semaphore object to add fence to
- * @fence: fence to sync to
- *
- * Sync to the fence using this semaphore object
- */
-void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
-				 struct radeon_fence *fence)
-{
-        struct radeon_fence *other;
-
-        if (!fence)
-                return;
-
-        other = semaphore->sync_to[fence->ring];
-        semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
-}
-
-/**
- * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object
- *
- * @sema: semaphore object to add fence from reservation object to
- * @resv: reservation object with embedded fence
- * @shared: true if we should onyl sync to the exclusive fence
- *
- * Sync to the fence using this semaphore object
- */
-int radeon_semaphore_sync_resv(struct radeon_device *rdev,
-			       struct radeon_semaphore *sema,
-			       struct reservation_object *resv,
-			       bool shared)
-{
-	struct reservation_object_list *flist;
-	struct fence *f;
-	struct radeon_fence *fence;
-	unsigned i;
-	int r = 0;
-
-	/* always sync to the exclusive fence */
-	f = reservation_object_get_excl(resv);
-	fence = f ? to_radeon_fence(f) : NULL;
-	if (fence && fence->rdev == rdev)
-		radeon_semaphore_sync_fence(sema, fence);
-	else if (f)
-		r = fence_wait(f, true);
-
-	flist = reservation_object_get_list(resv);
-	if (shared || !flist || r)
-		return r;
-
-	for (i = 0; i < flist->shared_count; ++i) {
-		f = rcu_dereference_protected(flist->shared[i],
-					      reservation_object_held(resv));
-		fence = to_radeon_fence(f);
-		if (fence && fence->rdev == rdev)
-			radeon_semaphore_sync_fence(sema, fence);
-		else
-			r = fence_wait(f, true);
-
-		if (r)
-			break;
-	}
-	return r;
-}
-
-/**
- * radeon_semaphore_sync_rings - sync ring to all registered fences
- *
- * @rdev: radeon_device pointer
- * @semaphore: semaphore object to use for sync
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
-				struct radeon_semaphore *semaphore,
-				int ring)
-{
-	unsigned count = 0;
-	int i, r;
-
-        for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-		struct radeon_fence *fence = semaphore->sync_to[i];
-
-		/* check if we really need to sync */
-                if (!radeon_fence_need_sync(fence, ring))
-			continue;
-
-		/* prevent GPU deadlocks */
-		if (!rdev->ring[i].ready) {
-			dev_err(rdev->dev, "Syncing to a disabled ring!");
-			return -EINVAL;
-		}
-
-		if (++count > RADEON_NUM_SYNCS) {
-			/* not enough room, wait manually */
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		/* allocate enough space for sync command */
-		r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
-		if (r) {
-			return r;
-		}
-
-		/* emit the signal semaphore */
-		if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
-			/* signaling wasn't successful wait manually */
-			radeon_ring_undo(&rdev->ring[i]);
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		/* we assume caller has already allocated space on waiters ring */
-		if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
-			/* waiting wasn't successful wait manually */
-			radeon_ring_undo(&rdev->ring[i]);
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		radeon_ring_commit(rdev, &rdev->ring[i], false);
-		radeon_fence_note_sync(fence, ring);
-
-		semaphore->gpu_addr += 8;
-	}
-
-	return 0;
-}
-
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
new file mode 100644
index 0000000..02ac8a1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_trace.h"
+
+/**
+ * radeon_sync_create - zero init sync object
+ *
+ * @sync: sync object to initialize
+ *
+ * Just clear the sync object for now.
+ */
+void radeon_sync_create(struct radeon_sync *sync)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+		sync->semaphores[i] = NULL;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		sync->sync_to[i] = NULL;
+
+	sync->last_vm_update = NULL;
+}
+
+/**
+ * radeon_sync_fence - use the semaphore to sync to a fence
+ *
+ * @sync: sync object to add fence to
+ * @fence: fence to sync to
+ *
+ * Sync to the fence using the semaphore objects
+ */
+void radeon_sync_fence(struct radeon_sync *sync,
+		       struct radeon_fence *fence)
+{
+	struct radeon_fence *other;
+
+	if (!fence)
+		return;
+
+	other = sync->sync_to[fence->ring];
+	sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+
+	if (fence->is_vm_update) {
+		other = sync->last_vm_update;
+		sync->last_vm_update = radeon_fence_later(fence, other);
+	}
+}
+
+/**
+ * radeon_sync_resv - use the semaphores to sync to a reservation object
+ *
+ * @sync: sync object to add fences from reservation object to
+ * @resv: reservation object with embedded fence
+ * @shared: true if we should only sync to the exclusive fence
+ *
+ * Sync to the fence using the semaphore objects
+ */
+int radeon_sync_resv(struct radeon_device *rdev,
+		     struct radeon_sync *sync,
+		     struct reservation_object *resv,
+		     bool shared)
+{
+	struct reservation_object_list *flist;
+	struct fence *f;
+	struct radeon_fence *fence;
+	unsigned i;
+	int r = 0;
+
+	/* always sync to the exclusive fence */
+	f = reservation_object_get_excl(resv);
+	fence = f ? to_radeon_fence(f) : NULL;
+	if (fence && fence->rdev == rdev)
+		radeon_sync_fence(sync, fence);
+	else if (f)
+		r = fence_wait(f, true);
+
+	flist = reservation_object_get_list(resv);
+	if (shared || !flist || r)
+		return r;
+
+	for (i = 0; i < flist->shared_count; ++i) {
+		f = rcu_dereference_protected(flist->shared[i],
+					      reservation_object_held(resv));
+		fence = to_radeon_fence(f);
+		if (fence && fence->rdev == rdev)
+			radeon_sync_fence(sync, fence);
+		else
+			r = fence_wait(f, true);
+
+		if (r)
+			break;
+	}
+	return r;
+}
+
+/**
+ * radeon_sync_rings - sync ring to all registered fences
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @ring: ring that needs sync
+ *
+ * Ensure that all registered fences are signaled before letting
+ * the ring continue. The caller must hold the ring lock.
+ */
+int radeon_sync_rings(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+		      int ring)
+{
+	unsigned count = 0;
+	int i, r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_fence *fence = sync->sync_to[i];
+		struct radeon_semaphore *semaphore;
+
+		/* check if we really need to sync */
+		if (!radeon_fence_need_sync(fence, ring))
+			continue;
+
+		/* prevent GPU deadlocks */
+		if (!rdev->ring[i].ready) {
+			dev_err(rdev->dev, "Syncing to a disabled ring!");
+			return -EINVAL;
+		}
+
+		if (count >= RADEON_NUM_SYNCS) {
+			/* not enough room, wait manually */
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+		r = radeon_semaphore_create(rdev, &semaphore);
+		if (r)
+			return r;
+
+		sync->semaphores[count++] = semaphore;
+
+		/* allocate enough space for sync command */
+		r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
+		if (r)
+			return r;
+
+		/* emit the signal semaphore */
+		if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
+			/* signaling wasn't successful wait manually */
+			radeon_ring_undo(&rdev->ring[i]);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+
+		/* we assume caller has already allocated space on waiters ring */
+		if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
+			/* waiting wasn't successful wait manually */
+			radeon_ring_undo(&rdev->ring[i]);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
+			continue;
+		}
+
+		radeon_ring_commit(rdev, &rdev->ring[i], false);
+		radeon_fence_note_sync(fence, ring);
+	}
+
+	return 0;
+}
+
+/**
+ * radeon_sync_free - free the sync object
+ *
+ * @rdev: radeon_device pointer
+ * @sync: sync object to use
+ * @fence: fence to use for the free
+ *
+ * Free the sync object by freeing all semaphores in it.
+ */
+void radeon_sync_free(struct radeon_device *rdev,
+		      struct radeon_sync *sync,
+		      struct radeon_fence *fence)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
+		radeon_semaphore_free(rdev, &sync->semaphores[i], fence);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index dfde266..0b10f3a 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -143,7 +143,7 @@
 	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tv.shared = false;
+	list[0].tv.shared = true;
 	list[0].tiling_flags = 0;
 	list[0].handle = 0;
 	list_add(&list[0].tv.head, head);
@@ -157,7 +157,7 @@
 		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tv.shared = false;
+		list[idx].tv.shared = true;
 		list[idx].tiling_flags = 0;
 		list[idx].handle = 0;
 		list_add(&list[idx++].tv.head, head);
@@ -182,15 +182,18 @@
 				       struct radeon_vm *vm, int ring)
 {
 	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
+
 	unsigned choices[2] = {};
 	unsigned i;
 
 	/* check if the id is still valid */
-	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+	if (vm_id->id && vm_id->last_id_use &&
+	    vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
 		return NULL;
 
 	/* we definately need to flush */
-	radeon_fence_unref(&vm->last_flush);
+	vm_id->pd_gpu_addr = ~0ll;
 
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
@@ -198,8 +201,8 @@
 
 		if (fence == NULL) {
 			/* found a free one */
-			vm->id = i;
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = i;
+			trace_radeon_vm_grab_id(i, ring);
 			return NULL;
 		}
 
@@ -211,8 +214,8 @@
 
 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
-			vm->id = choices[i];
-			trace_radeon_vm_grab_id(vm->id, ring);
+			vm_id->id = choices[i];
+			trace_radeon_vm_grab_id(choices[i], ring);
 			return rdev->vm_manager.active[choices[i]];
 		}
 	}
@@ -228,6 +231,7 @@
  * @rdev: radeon_device pointer
  * @vm: vm we want to flush
  * @ring: ring to use for flush
+ * @updates: last vm update that is waited for
  *
  * Flush the vm (cayman+).
  *
@@ -235,15 +239,21 @@
  */
 void radeon_vm_flush(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
-		     int ring)
+		     int ring, struct radeon_fence *updates)
 {
 	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+	struct radeon_vm_id *vm_id = &vm->ids[ring];
 
-	/* if we can't remember our last VM flush then flush now! */
-	if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
-		trace_radeon_vm_flush(pd_addr, ring, vm->id);
-		vm->pd_gpu_addr = pd_addr;
-		radeon_ring_vm_flush(rdev, ring, vm);
+	if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
+	    radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
+
+		trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
+		radeon_fence_unref(&vm_id->flushed_updates);
+		vm_id->flushed_updates = radeon_fence_ref(updates);
+		vm_id->pd_gpu_addr = pd_addr;
+		radeon_ring_vm_flush(rdev, &rdev->ring[ring],
+				     vm_id->id, vm_id->pd_gpu_addr);
+
 	}
 }
 
@@ -263,18 +273,13 @@
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence)
 {
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(fence);
+	unsigned vm_id = vm->ids[fence->ring].id;
 
-	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
-	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+	radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
+	rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
 
-	radeon_fence_unref(&vm->last_id_use);
-	vm->last_id_use = radeon_fence_ref(fence);
-
-        /* we just flushed the VM, remember that */
-        if (!vm->last_flush)
-                vm->last_flush = radeon_fence_ref(fence);
+	radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
+	vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
 }
 
 /**
@@ -387,35 +392,25 @@
 static int radeon_vm_clear_bo(struct radeon_device *rdev,
 			      struct radeon_bo *bo)
 {
-        struct ttm_validate_buffer tv;
-        struct ww_acquire_ctx ticket;
-        struct list_head head;
 	struct radeon_ib ib;
 	unsigned entries;
 	uint64_t addr;
 	int r;
 
-        memset(&tv, 0, sizeof(tv));
-        tv.bo = &bo->tbo;
-	tv.shared = false;
-
-        INIT_LIST_HEAD(&head);
-        list_add(&tv.head, &head);
-
-        r = ttm_eu_reserve_buffers(&ticket, &head, true);
-        if (r)
+	r = radeon_bo_reserve(bo, false);
+	if (r)
 		return r;
 
-        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-        if (r)
-                goto error;
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r)
+		goto error_unreserve;
 
 	addr = radeon_bo_gpu_offset(bo);
 	entries = radeon_bo_size(bo) / 8;
 
 	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
 	if (r)
-                goto error;
+		goto error_unreserve;
 
 	ib.length_dw = 0;
 
@@ -425,15 +420,16 @@
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r)
-                goto error;
+		goto error_free;
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
+	ib.fence->is_vm_update = true;
+	radeon_bo_fence(bo, ib.fence, false);
+
+error_free:
 	radeon_ib_free(rdev, &ib);
 
-	return 0;
-
-error:
-	ttm_eu_backoff_reservation(&ticket, &head);
+error_unreserve:
+	radeon_bo_unreserve(bo);
 	return r;
 }
 
@@ -449,7 +445,7 @@
  * Validate and set the offset requested within the vm address space.
  * Returns 0 for success, error for failure.
  *
- * Object has to be reserved!
+ * Object has to be reserved and gets unreserved by this function!
  */
 int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			  struct radeon_bo_va *bo_va,
@@ -575,7 +571,7 @@
 	}
 
 	mutex_unlock(&vm->mutex);
-	return radeon_bo_reserve(bo_va->bo, false);
+	return 0;
 }
 
 /**
@@ -699,17 +695,15 @@
 	if (ib.length_dw != 0) {
 		radeon_asic_vm_pad_ib(rdev, &ib);
 
-		radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false);
-		radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use);
+		radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true);
 		WARN_ON(ib.length_dw > ndw);
 		r = radeon_ib_schedule(rdev, &ib, NULL, false);
 		if (r) {
 			radeon_ib_free(rdev, &ib);
 			return r;
 		}
-		radeon_fence_unref(&vm->fence);
-		vm->fence = radeon_fence_ref(ib.fence);
-		radeon_fence_unref(&vm->last_flush);
+		ib.fence->is_vm_update = true;
+		radeon_bo_fence(pd, ib.fence, false);
 	}
 	radeon_ib_free(rdev, &ib);
 
@@ -826,7 +820,7 @@
 		unsigned nptes;
 		uint64_t pte;
 
-		radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false);
+		radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
@@ -863,6 +857,31 @@
 }
 
 /**
+ * radeon_vm_fence_pts - fence page tables after an update
+ *
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @fence: fence to use
+ *
+ * Fence the page tables in the range @start - @end (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_fence_pts(struct radeon_vm *vm,
+				uint64_t start, uint64_t end,
+				struct radeon_fence *fence)
+{
+	unsigned i;
+
+	start >>= radeon_vm_block_size;
+	end >>= radeon_vm_block_size;
+
+	for (i = start; i <= end; ++i)
+		radeon_bo_fence(vm->page_tables[i].bo, fence, false);
+}
+
+/**
  * radeon_vm_bo_update - map a bo into the vm page table
  *
  * @rdev: radeon_device pointer
@@ -961,6 +980,13 @@
 		return r;
 	ib.length_dw = 0;
 
+	if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
+		unsigned i;
+
+		for (i = 0; i < RADEON_NUM_RINGS; ++i)
+			radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
+	}
+
 	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
 			      bo_va->it.last + 1, addr,
 			      radeon_vm_page_flags(bo_va->flags));
@@ -968,16 +994,16 @@
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > ndw);
 
-	radeon_semaphore_sync_fence(ib.semaphore, vm->fence);
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
+	ib.fence->is_vm_update = true;
+	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
+	radeon_fence_unref(&bo_va->last_pt_update);
+	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
 	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);
 
 	return 0;
 }
@@ -1002,6 +1028,7 @@
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		r = radeon_vm_bo_update(rdev, bo_va, NULL);
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 		if (r)
 			return r;
@@ -1060,6 +1087,7 @@
 		bo_va->bo = radeon_bo_ref(bo_va->bo);
 		list_add(&bo_va->vm_status, &vm->freed);
 	} else {
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
 
@@ -1103,13 +1131,14 @@
 	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
 		RADEON_VM_PTE_COUNT * 8);
 	unsigned pd_size, pd_entries, pts_size;
-	int r;
+	int i, r;
 
-	vm->id = 0;
 	vm->ib_bo_va = NULL;
-	vm->fence = NULL;
-	vm->last_flush = NULL;
-	vm->last_id_use = NULL;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		vm->ids[i].id = 0;
+		vm->ids[i].flushed_updates = NULL;
+		vm->ids[i].last_id_use = NULL;
+	}
 	mutex_init(&vm->mutex);
 	vm->va = RB_ROOT;
 	INIT_LIST_HEAD(&vm->invalidated);
@@ -1165,11 +1194,13 @@
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
 			radeon_bo_unreserve(bo_va->bo);
+			radeon_fence_unref(&bo_va->last_pt_update);
 			kfree(bo_va);
 		}
 	}
 	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
 		radeon_bo_unref(&bo_va->bo);
+		radeon_fence_unref(&bo_va->last_pt_update);
 		kfree(bo_va);
 	}
 
@@ -1179,9 +1210,10 @@
 
 	radeon_bo_unref(&vm->page_directory);
 
-	radeon_fence_unref(&vm->fence);
-	radeon_fence_unref(&vm->last_flush);
-	radeon_fence_unref(&vm->last_id_use);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		radeon_fence_unref(&vm->ids[i].flushed_updates);
+		radeon_fence_unref(&vm->ids[i].last_id_use);
+	}
 
 	mutex_destroy(&vm->mutex);
 }
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index 7f34bad..acff6e0 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -44,31 +44,27 @@
 				    unsigned num_gpu_pages,
 				    struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
 	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_dw = size_in_dw;
@@ -87,12 +83,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index eeea5b6..14896ce 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3362,6 +3362,7 @@
 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
 	u32 header;
 
 	if (ib->is_const_ib) {
@@ -3397,14 +3398,13 @@
 #endif
 			  (ib->gpu_addr & 0xFFFFFFFC));
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
-	radeon_ring_write(ring, ib->length_dw |
-			  (ib->vm ? (ib->vm->id << 24) : 0));
+	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
 
 	if (!ib->is_const_ib) {
 		/* flush read cache over gart for this vmid */
 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
+		radeon_ring_write(ring, vm_id);
 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
 				  PACKET3_TC_ACTION_ENA |
@@ -5020,27 +5020,23 @@
 	       block, mc_id);
 }
 
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		 unsigned vm_id, uint64_t pd_addr)
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	/* write new base address */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
 				 WRITE_DATA_DST_SEL(0)));
 
-	if (vm->id < 8) {
+	if (vm_id < 8) {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
 	} else {
 		radeon_ring_write(ring,
-				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
 	}
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* flush hdp cache */
 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5056,7 +5052,7 @@
 				 WRITE_DATA_DST_SEL(0)));
 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
 	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 
 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index b58f12b..f5cc777 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -185,20 +185,17 @@
 	}
 }
 
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
+		     unsigned vm_id, uint64_t pd_addr)
+
 {
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-	if (vm->id < 8) {
-		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+	if (vm_id < 8) {
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
 	} else {
-		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2));
 	}
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+	radeon_ring_write(ring, pd_addr >> 12);
 
 	/* flush hdp cache */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
@@ -208,7 +205,7 @@
 	/* bits 0-7 are the VM contexts0-7 */
 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-	radeon_ring_write(ring, 1 << vm->id);
+	radeon_ring_write(ring, 1 << vm_id);
 }
 
 /**
@@ -229,31 +226,27 @@
 				 unsigned num_gpu_pages,
 				 struct reservation_object *resv)
 {
-	struct radeon_semaphore *sem = NULL;
 	struct radeon_fence *fence;
+	struct radeon_sync sync;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes;
 	int i, num_loops;
 	int r = 0;
 
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return ERR_PTR(r);
-	}
+	radeon_sync_create(&sync);
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_resv(rdev, sem, resv, false);
-	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
+	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
@@ -272,12 +265,12 @@
 	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
-		radeon_semaphore_free(rdev, &sem, NULL);
+		radeon_sync_free(rdev, &sync, NULL);
 		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, fence);
+	radeon_sync_free(rdev, &sync, fence);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index b59e1d6..cf4c420 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3398,6 +3398,15 @@
 
 	ret = si_read_smc_sram_dword(rdev,
 				     SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
+				     SISLANDS_SMC_FIRMWARE_HEADER_fanTable,
+				     &tmp, si_pi->sram_end);
+	if (ret)
+		return ret;
+
+	si_pi->fan_table_start = tmp;
+
+	ret = si_read_smc_sram_dword(rdev,
+				     SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
 				     SISLANDS_SMC_FIRMWARE_HEADER_mcArbDramAutoRefreshTable,
 				     &tmp, si_pi->sram_end);
 	if (ret)
@@ -5825,20 +5834,20 @@
 	if (enable) {
 		PPSMC_Result result;
 
-		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
-		rdev->irq.dpm_thermal = true;
+		thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+		WREG32(CG_THERMAL_INT, thermal_int);
+		rdev->irq.dpm_thermal = false;
 		result = si_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt);
 		if (result != PPSMC_Result_OK) {
 			DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
 			return -EINVAL;
 		}
 	} else {
-		thermal_int &= ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
-		rdev->irq.dpm_thermal = false;
+		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+		WREG32(CG_THERMAL_INT, thermal_int);
+		rdev->irq.dpm_thermal = true;
 	}
 
-	WREG32(CG_THERMAL_INT, thermal_int);
-
 	return 0;
 }
 
@@ -5867,6 +5876,309 @@
 	return 0;
 }
 
+static void si_fan_ctrl_set_static_mode(struct radeon_device *rdev, u32 mode)
+{
+	struct si_power_info *si_pi = si_get_pi(rdev);
+	u32 tmp;
+
+	if (si_pi->fan_ctrl_is_in_default_mode) {
+		tmp = (RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK) >> FDO_PWM_MODE_SHIFT;
+		si_pi->fan_ctrl_default_mode = tmp;
+		tmp = (RREG32(CG_FDO_CTRL2) & TMIN_MASK) >> TMIN_SHIFT;
+		si_pi->t_min = tmp;
+		si_pi->fan_ctrl_is_in_default_mode = false;
+	}
+
+	tmp = RREG32(CG_FDO_CTRL2) & ~TMIN_MASK;
+	tmp |= TMIN(0);
+	WREG32(CG_FDO_CTRL2, tmp);
+
+	tmp = RREG32(CG_FDO_CTRL2) & FDO_PWM_MODE_MASK;
+	tmp |= FDO_PWM_MODE(mode);
+	WREG32(CG_FDO_CTRL2, tmp);
+}
+
+static int si_thermal_setup_fan_table(struct radeon_device *rdev)
+{
+	struct si_power_info *si_pi = si_get_pi(rdev);
+	PP_SIslands_FanTable fan_table = { FDO_MODE_HARDWARE };
+	u32 duty100;
+	u32 t_diff1, t_diff2, pwm_diff1, pwm_diff2;
+	u16 fdo_min, slope1, slope2;
+	u32 reference_clock, tmp;
+	int ret;
+	u64 tmp64;
+
+	if (!si_pi->fan_table_start) {
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+		return 0;
+	}
+
+	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+	if (duty100 == 0) {
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+		return 0;
+	}
+
+	tmp64 = (u64)rdev->pm.dpm.fan.pwm_min * duty100;
+	do_div(tmp64, 10000);
+	fdo_min = (u16)tmp64;
+
+	t_diff1 = rdev->pm.dpm.fan.t_med - rdev->pm.dpm.fan.t_min;
+	t_diff2 = rdev->pm.dpm.fan.t_high - rdev->pm.dpm.fan.t_med;
+
+	pwm_diff1 = rdev->pm.dpm.fan.pwm_med - rdev->pm.dpm.fan.pwm_min;
+	pwm_diff2 = rdev->pm.dpm.fan.pwm_high - rdev->pm.dpm.fan.pwm_med;
+
+	slope1 = (u16)((50 + ((16 * duty100 * pwm_diff1) / t_diff1)) / 100);
+	slope2 = (u16)((50 + ((16 * duty100 * pwm_diff2) / t_diff2)) / 100);
+
+	fan_table.slope1 = cpu_to_be16(slope1);
+	fan_table.slope2 = cpu_to_be16(slope2);
+
+	fan_table.fdo_min = cpu_to_be16(fdo_min);
+
+	fan_table.hys_down = cpu_to_be16(rdev->pm.dpm.fan.t_hyst);
+
+	fan_table.hys_up = cpu_to_be16(1);
+
+	fan_table.hys_slope = cpu_to_be16(1);
+
+	fan_table.temp_resp_lim = cpu_to_be16(5);
+
+	reference_clock = radeon_get_xclk(rdev);
+
+	fan_table.refresh_period = cpu_to_be32((rdev->pm.dpm.fan.cycle_delay *
+						reference_clock) / 1600);
+
+	fan_table.fdo_max = cpu_to_be16((u16)duty100);
+
+	tmp = (RREG32(CG_MULT_THERMAL_CTRL) & TEMP_SEL_MASK) >> TEMP_SEL_SHIFT;
+	fan_table.temp_src = (uint8_t)tmp;
+
+	ret = si_copy_bytes_to_smc(rdev,
+				   si_pi->fan_table_start,
+				   (u8 *)(&fan_table),
+				   sizeof(fan_table),
+				   si_pi->sram_end);
+
+	if (ret) {
+		DRM_ERROR("Failed to load fan table to the SMC.");
+		rdev->pm.dpm.fan.ucode_fan_control = false;
+	}
+
+	return 0;
+}
+
+static int si_fan_ctrl_start_smc_fan_control(struct radeon_device *rdev)
+{
+	PPSMC_Result ret;
+
+	ret = si_send_msg_to_smc(rdev, PPSMC_StartFanControl);
+	if (ret == PPSMC_Result_OK)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static int si_fan_ctrl_stop_smc_fan_control(struct radeon_device *rdev)
+{
+	PPSMC_Result ret;
+
+	ret = si_send_msg_to_smc(rdev, PPSMC_StopFanControl);
+	if (ret == PPSMC_Result_OK)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+#if 0
+static int si_fan_ctrl_get_fan_speed_percent(struct radeon_device *rdev,
+					     u32 *speed)
+{
+	u32 duty, duty100;
+	u64 tmp64;
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+	duty = (RREG32(CG_THERMAL_STATUS) & FDO_PWM_DUTY_MASK) >> FDO_PWM_DUTY_SHIFT;
+
+	if (duty100 == 0)
+		return -EINVAL;
+
+	tmp64 = (u64)duty * 100;
+	do_div(tmp64, duty100);
+	*speed = (u32)tmp64;
+
+	if (*speed > 100)
+		*speed = 100;
+
+	return 0;
+}
+
+static int si_fan_ctrl_set_fan_speed_percent(struct radeon_device *rdev,
+					     u32 speed)
+{
+	u32 tmp;
+	u32 duty, duty100;
+	u64 tmp64;
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (speed > 100)
+		return -EINVAL;
+
+	if (rdev->pm.dpm.fan.ucode_fan_control)
+		si_fan_ctrl_stop_smc_fan_control(rdev);
+
+	duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT;
+
+	if (duty100 == 0)
+		return -EINVAL;
+
+	tmp64 = (u64)speed * duty100;
+	do_div(tmp64, 100);
+	duty = (u32)tmp64;
+
+	tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK;
+	tmp |= FDO_STATIC_DUTY(duty);
+	WREG32(CG_FDO_CTRL0, tmp);
+
+	si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+	return 0;
+}
+
+static int si_fan_ctrl_get_fan_speed_rpm(struct radeon_device *rdev,
+					 u32 *speed)
+{
+	u32 tach_period;
+	u32 xclk = radeon_get_xclk(rdev);
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (rdev->pm.fan_pulses_per_revolution == 0)
+		return -ENOENT;
+
+	tach_period = (RREG32(CG_TACH_STATUS) & TACH_PERIOD_MASK) >> TACH_PERIOD_SHIFT;
+	if (tach_period == 0)
+		return -ENOENT;
+
+	*speed = 60 * xclk * 10000 / tach_period;
+
+	return 0;
+}
+
+static int si_fan_ctrl_set_fan_speed_rpm(struct radeon_device *rdev,
+					 u32 speed)
+{
+	u32 tach_period, tmp;
+	u32 xclk = radeon_get_xclk(rdev);
+
+	if (rdev->pm.no_fan)
+		return -ENOENT;
+
+	if (rdev->pm.fan_pulses_per_revolution == 0)
+		return -ENOENT;
+
+	if ((speed < rdev->pm.fan_min_rpm) ||
+	    (speed > rdev->pm.fan_max_rpm))
+		return -EINVAL;
+
+	if (rdev->pm.dpm.fan.ucode_fan_control)
+		si_fan_ctrl_stop_smc_fan_control(rdev);
+
+	tach_period = 60 * xclk * 10000 / (8 * speed);
+	tmp = RREG32(CG_TACH_CTRL) & ~TARGET_PERIOD_MASK;
+	tmp |= TARGET_PERIOD(tach_period);
+	WREG32(CG_TACH_CTRL, tmp);
+
+	si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+
+	return 0;
+}
+#endif
+
+static void si_fan_ctrl_set_default_mode(struct radeon_device *rdev)
+{
+	struct si_power_info *si_pi = si_get_pi(rdev);
+	u32 tmp;
+
+	if (!si_pi->fan_ctrl_is_in_default_mode) {
+		tmp = RREG32(CG_FDO_CTRL2) & ~FDO_PWM_MODE_MASK;
+		tmp |= FDO_PWM_MODE(si_pi->fan_ctrl_default_mode);
+		WREG32(CG_FDO_CTRL2, tmp);
+
+		tmp = RREG32(CG_FDO_CTRL2) & TMIN_MASK;
+		tmp |= TMIN(si_pi->t_min);
+		WREG32(CG_FDO_CTRL2, tmp);
+		si_pi->fan_ctrl_is_in_default_mode = true;
+	}
+}
+
+static void si_thermal_start_smc_fan_control(struct radeon_device *rdev)
+{
+	if (rdev->pm.dpm.fan.ucode_fan_control) {
+		si_fan_ctrl_start_smc_fan_control(rdev);
+		si_fan_ctrl_set_static_mode(rdev, FDO_PWM_MODE_STATIC);
+	}
+}
+
+static void si_thermal_initialize(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	if (rdev->pm.fan_pulses_per_revolution) {
+		tmp = RREG32(CG_TACH_CTRL) & ~EDGE_PER_REV_MASK;
+		tmp |= EDGE_PER_REV(rdev->pm.fan_pulses_per_revolution -1);
+		WREG32(CG_TACH_CTRL, tmp);
+	}
+
+	tmp = RREG32(CG_FDO_CTRL2) & ~TACH_PWM_RESP_RATE_MASK;
+	tmp |= TACH_PWM_RESP_RATE(0x28);
+	WREG32(CG_FDO_CTRL2, tmp);
+}
+
+static int si_thermal_start_thermal_controller(struct radeon_device *rdev)
+{
+	int ret;
+
+	si_thermal_initialize(rdev);
+	ret = si_thermal_set_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+	if (ret)
+		return ret;
+	ret = si_thermal_enable_alert(rdev, true);
+	if (ret)
+		return ret;
+	if (rdev->pm.dpm.fan.ucode_fan_control) {
+		ret = si_halt_smc(rdev);
+		if (ret)
+			return ret;
+		ret = si_thermal_setup_fan_table(rdev);
+		if (ret)
+			return ret;
+		ret = si_resume_smc(rdev);
+		if (ret)
+			return ret;
+		si_thermal_start_smc_fan_control(rdev);
+	}
+
+	return 0;
+}
+
+static void si_thermal_stop_thermal_controller(struct radeon_device *rdev)
+{
+	if (!rdev->pm.no_fan) {
+		si_fan_ctrl_set_default_mode(rdev);
+		si_fan_ctrl_stop_smc_fan_control(rdev);
+	}
+}
+
 int si_dpm_enable(struct radeon_device *rdev)
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
@@ -5979,6 +6291,8 @@
 
 	si_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
 
+	si_thermal_start_thermal_controller(rdev);
+
 	ni_update_current_ps(rdev, boot_ps);
 
 	return 0;
@@ -6019,6 +6333,7 @@
 
 	if (!si_is_smc_running(rdev))
 		return;
+	si_thermal_stop_thermal_controller(rdev);
 	si_disable_ulv(rdev);
 	si_clear_vc(rdev);
 	if (pi->thermal_protection)
@@ -6557,6 +6872,9 @@
 		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
 			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
 
+	si_pi->fan_ctrl_is_in_default_mode = true;
+	rdev->pm.dpm.fan.ucode_fan_control = false;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/si_dpm.h b/drivers/gpu/drm/radeon/si_dpm.h
index 8b5c06a..d16bb1b 100644
--- a/drivers/gpu/drm/radeon/si_dpm.h
+++ b/drivers/gpu/drm/radeon/si_dpm.h
@@ -182,6 +182,7 @@
 	u32 dte_table_start;
 	u32 spll_table_start;
 	u32 papm_cfg_table_start;
+	u32 fan_table_start;
 	/* CAC stuff */
 	const struct si_cac_config_reg *cac_weights;
 	const struct si_cac_config_reg *lcac_config;
@@ -197,6 +198,10 @@
 	/* SVI2 */
 	u8 svd_gpio_id;
 	u8 svc_gpio_id;
+	/* fan control */
+	bool fan_ctrl_is_in_default_mode;
+	u32 t_min;
+	u32 fan_ctrl_default_mode;
 };
 
 #define SISLANDS_INITIAL_STATE_ARB_INDEX    0
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 6635da9..c549c16a 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -180,7 +180,10 @@
 #define		DIG_THERM_DPM(x)			((x) << 14)
 #define		DIG_THERM_DPM_MASK			0x003FC000
 #define		DIG_THERM_DPM_SHIFT			14
-
+#define	CG_THERMAL_STATUS				0x704
+#define		FDO_PWM_DUTY(x)				((x) << 9)
+#define		FDO_PWM_DUTY_MASK			(0xff << 9)
+#define		FDO_PWM_DUTY_SHIFT			9
 #define	CG_THERMAL_INT					0x708
 #define		DIG_THERM_INTH(x)			((x) << 8)
 #define		DIG_THERM_INTH_MASK			0x0000FF00
@@ -191,6 +194,10 @@
 #define 	THERM_INT_MASK_HIGH			(1 << 24)
 #define 	THERM_INT_MASK_LOW			(1 << 25)
 
+#define	CG_MULT_THERMAL_CTRL					0x710
+#define		TEMP_SEL(x)					((x) << 20)
+#define		TEMP_SEL_MASK					(0xff << 20)
+#define		TEMP_SEL_SHIFT					20
 #define	CG_MULT_THERMAL_STATUS					0x714
 #define		ASIC_MAX_TEMP(x)				((x) << 0)
 #define		ASIC_MAX_TEMP_MASK				0x000001ff
@@ -199,6 +206,37 @@
 #define		CTF_TEMP_MASK					0x0003fe00
 #define		CTF_TEMP_SHIFT					9
 
+#define	CG_FDO_CTRL0					0x754
+#define		FDO_STATIC_DUTY(x)			((x) << 0)
+#define		FDO_STATIC_DUTY_MASK			0x0000000F
+#define		FDO_STATIC_DUTY_SHIFT			0
+#define	CG_FDO_CTRL1					0x758
+#define		FMAX_DUTY100(x)				((x) << 0)
+#define		FMAX_DUTY100_MASK			0x0000000F
+#define		FMAX_DUTY100_SHIFT			0
+#define	CG_FDO_CTRL2					0x75C
+#define		TMIN(x)					((x) << 0)
+#define		TMIN_MASK				0x0000000F
+#define		TMIN_SHIFT				0
+#define		FDO_PWM_MODE(x)				((x) << 11)
+#define		FDO_PWM_MODE_MASK			(3 << 11)
+#define		FDO_PWM_MODE_SHIFT			11
+#define		TACH_PWM_RESP_RATE(x)			((x) << 25)
+#define		TACH_PWM_RESP_RATE_MASK			(0x7f << 25)
+#define		TACH_PWM_RESP_RATE_SHIFT		25
+
+#define CG_TACH_CTRL                                    0x770
+#       define EDGE_PER_REV(x)                          ((x) << 0)
+#       define EDGE_PER_REV_MASK                        (0x7 << 0)
+#       define EDGE_PER_REV_SHIFT                       0
+#       define TARGET_PERIOD(x)                         ((x) << 3)
+#       define TARGET_PERIOD_MASK                       0xfffffff8
+#       define TARGET_PERIOD_SHIFT                      3
+#define CG_TACH_STATUS                                  0x774
+#       define TACH_PERIOD(x)                           ((x) << 0)
+#       define TACH_PERIOD_MASK                         0xffffffff
+#       define TACH_PERIOD_SHIFT                        0
+
 #define GENERAL_PWRMGT                                  0x780
 #       define GLOBAL_PWRMGT_EN                         (1 << 0)
 #       define STATIC_PM_EN                             (1 << 1)
diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h
index 623a0b1..3c77983 100644
--- a/drivers/gpu/drm/radeon/sislands_smc.h
+++ b/drivers/gpu/drm/radeon/sislands_smc.h
@@ -245,6 +245,31 @@
 #define SI_SMC_SOFT_REGISTER_svi_rework_gpio_id_svd   0x11c
 #define SI_SMC_SOFT_REGISTER_svi_rework_gpio_id_svc   0x120
 
+struct PP_SIslands_FanTable
+{
+	uint8_t  fdo_mode;
+	uint8_t  padding;
+	int16_t  temp_min;
+	int16_t  temp_med;
+	int16_t  temp_max;
+	int16_t  slope1;
+	int16_t  slope2;
+	int16_t  fdo_min;
+	int16_t  hys_up;
+	int16_t  hys_down;
+	int16_t  hys_slope;
+	int16_t  temp_resp_lim;
+	int16_t  temp_curr;
+	int16_t  slope_curr;
+	int16_t  pwm_curr;
+	uint32_t refresh_period;
+	int16_t  fdo_max;
+	uint8_t  temp_src;
+	int8_t  padding2;
+};
+
+typedef struct PP_SIslands_FanTable PP_SIslands_FanTable;
+
 #define SMC_SISLANDS_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16
 #define SMC_SISLANDS_LKGE_LUT_NUM_OF_VOLT_ENTRIES 32
 
diff --git a/drivers/gpu/drm/radeon/smu7_discrete.h b/drivers/gpu/drm/radeon/smu7_discrete.h
index 82f70c9..0b0b404 100644
--- a/drivers/gpu/drm/radeon/smu7_discrete.h
+++ b/drivers/gpu/drm/radeon/smu7_discrete.h
@@ -431,6 +431,31 @@
 
 typedef struct SMU7_Discrete_MCRegisters SMU7_Discrete_MCRegisters;
 
+struct SMU7_Discrete_FanTable
+{
+	uint16_t FdoMode;
+	int16_t  TempMin;
+	int16_t  TempMed;
+	int16_t  TempMax;
+	int16_t  Slope1;
+	int16_t  Slope2;
+	int16_t  FdoMin;
+	int16_t  HystUp;
+	int16_t  HystDown;
+	int16_t  HystSlope;
+	int16_t  TempRespLim;
+	int16_t  TempCurr;
+	int16_t  SlopeCurr;
+	int16_t  PwmCurr;
+	uint32_t RefreshPeriod;
+	int16_t  FdoMax;
+	uint8_t  TempSrc;
+	int8_t   Padding;
+};
+
+typedef struct SMU7_Discrete_FanTable SMU7_Discrete_FanTable;
+
+
 struct SMU7_Discrete_PmFuses {
   // dw0-dw1
   uint8_t BapmVddCVidHiSidd[8];
@@ -462,7 +487,10 @@
   uint8_t BapmVddCVidHiSidd2[8];
 
   // dw11-dw12
-  uint32_t Reserved6[2];
+  int16_t FuzzyFan_ErrorSetDelta;
+  int16_t FuzzyFan_ErrorRateSetDelta;
+  int16_t FuzzyFan_PwmSetDelta;
+  uint16_t CalcMeasPowerBlend;
 
   // dw13-dw16
   uint8_t GnbLPML[16];