drm/radeon/kms: add dynamic engine reclocking (V9)

V2: reorganize functions, fix modesetting calls
V3: rebase patch, use radeon's workqueue
V4: enable on tested chipsets only, request VBLANK IRQs
V5: enable PM on older hardware (IRQs, mode_fixup, dpms)
V6: use separate dynpm module parameter
V7: drop RADEON_ prefix, set minimum mode for dpms off
V8: update legacy encoder call, fix order in rs600 IRQ
V9: update compute_clocks call in legacy, not only DPMS_OFF

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 597f85b..05502bf 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -312,9 +312,13 @@
 		/* Vertical blank interrupts */
 		if (status & RADEON_CRTC_VBLANK_STAT) {
 			drm_handle_vblank(rdev->ddev, 0);
+			if (rdev->pm.vblank_callback)
+				queue_work(rdev->wq, &rdev->pm.reclock_work);
 		}
 		if (status & RADEON_CRTC2_VBLANK_STAT) {
 			drm_handle_vblank(rdev->ddev, 1);
+			if (rdev->pm.vblank_callback)
+				queue_work(rdev->wq, &rdev->pm.reclock_work);
 		}
 		if (status & RADEON_FP_DETECT_STAT) {
 			queue_hotplug = true;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 7c32a23..65daf55 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2744,6 +2744,8 @@
 			case 0: /* D1 vblank */
 				if (disp_int & LB_D1_VBLANK_INTERRUPT) {
 					drm_handle_vblank(rdev->ddev, 0);
+					if (rdev->pm.vblank_callback)
+						queue_work(rdev->wq, &rdev->pm.reclock_work);
 					disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -2764,6 +2766,8 @@
 			case 0: /* D2 vblank */
 				if (disp_int & LB_D2_VBLANK_INTERRUPT) {
 					drm_handle_vblank(rdev->ddev, 1);
+					if (rdev->pm.vblank_callback)
+						queue_work(rdev->wq, &rdev->pm.reclock_work);
 					disp_int &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 29ae383..d7da6e7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -89,6 +89,7 @@
 extern int radeon_connector_table;
 extern int radeon_tv;
 extern int radeon_new_pll;
+extern int radeon_dynpm;
 extern int radeon_audio;
 
 /*
@@ -148,6 +149,7 @@
  * Power management
  */
 int radeon_pm_init(struct radeon_device *rdev);
+void radeon_pm_compute_clocks(struct radeon_device *rdev);
 
 /*
  * Fences.
@@ -569,7 +571,33 @@
  * Equation between gpu/memory clock and available bandwidth is hw dependent
  * (type of memory, bus size, efficiency, ...)
  */
+enum radeon_pm_state {
+	PM_STATE_DISABLED,
+	PM_STATE_MINIMUM,
+	PM_STATE_PAUSED,
+	PM_STATE_ACTIVE
+};
+enum radeon_pm_action {
+	PM_ACTION_NONE,
+	PM_ACTION_MINIMUM,
+	PM_ACTION_DOWNCLOCK,
+	PM_ACTION_UPCLOCK
+};
 struct radeon_pm {
+	struct mutex		mutex;
+	struct work_struct	reclock_work;
+	struct delayed_work	idle_work;
+	enum radeon_pm_state	state;
+	enum radeon_pm_action	planned_action;
+	unsigned long		action_timeout;
+	bool 			downclocked;
+	bool			vblank_callback;
+	int			active_crtcs;
+	int			req_vblank;
+	uint32_t		min_gpu_engine_clock;
+	uint32_t		min_gpu_memory_clock;
+	uint32_t		min_mode_engine_clock;
+	uint32_t		min_mode_memory_clock;
 	fixed20_12		max_bandwidth;
 	fixed20_12		igp_sideport_mclk;
 	fixed20_12		igp_system_mclk;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 18ac29c..a1c937d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -642,6 +642,7 @@
 	if (rdev->family >= CHIP_R600)
 		spin_lock_init(&rdev->ih.lock);
 	mutex_init(&rdev->gem.mutex);
+	mutex_init(&rdev->pm.mutex);
 	rwlock_init(&rdev->fence_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8ba3de7..a9572e6 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -87,6 +87,7 @@
 int radeon_connector_table = 0;
 int radeon_tv = 1;
 int radeon_new_pll = 1;
+int radeon_dynpm = -1;
 int radeon_audio = 1;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
@@ -125,6 +126,9 @@
 MODULE_PARM_DESC(new_pll, "Select new PLL code for AVIVO chips");
 module_param_named(new_pll, radeon_new_pll, int, 0444);
 
+MODULE_PARM_DESC(dynpm, "Disable/Enable dynamic power management (1 = enable)");
+module_param_named(dynpm, radeon_dynpm, int, 0444);
+
 MODULE_PARM_DESC(audio, "Audio enable (0 = disable)");
 module_param_named(audio, radeon_audio, int, 0444);
 
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index c5ba1e3..f7d6078 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -262,6 +262,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
+	/* adjust pm to upcoming mode change */
+	radeon_pm_compute_clocks(rdev);
+
 	/* set the active encoder to connector routing */
 	radeon_encoder_set_active_device(encoder);
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
@@ -1013,6 +1016,9 @@
 		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 	}
 	radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 union crtc_source_param {
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 38e45e2..cf389ce 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -115,6 +115,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_lvds_prepare(struct drm_encoder *encoder)
@@ -214,6 +217,11 @@
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	/* adjust pm to upcoming mode change */
+	radeon_pm_compute_clocks(rdev);
 
 	/* set the active encoder to connector routing */
 	radeon_encoder_set_active_device(encoder);
@@ -285,6 +293,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_primary_dac_prepare(struct drm_encoder *encoder)
@@ -470,6 +481,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tmds_int_prepare(struct drm_encoder *encoder)
@@ -635,6 +649,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tmds_ext_prepare(struct drm_encoder *encoder)
@@ -842,6 +859,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tv_dac_prepare(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 8bce64c..a9c61f4 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -22,17 +22,253 @@
 #include "drmP.h"
 #include "radeon.h"
 
-int radeon_debugfs_pm_init(struct radeon_device *rdev);
+#define RADEON_IDLE_LOOP_MS 100
+#define RADEON_RECLOCK_DELAY_MS 200
+
+static void radeon_pm_check_limits(struct radeon_device *rdev);
+static void radeon_pm_set_clocks_locked(struct radeon_device *rdev);
+static void radeon_pm_set_clocks(struct radeon_device *rdev);
+static void radeon_pm_reclock_work_handler(struct work_struct *work);
+static void radeon_pm_idle_work_handler(struct work_struct *work);
+static int radeon_debugfs_pm_init(struct radeon_device *rdev);
+
+static const char *pm_state_names[4] = {
+	"PM_STATE_DISABLED",
+	"PM_STATE_MINIMUM",
+	"PM_STATE_PAUSED",
+	"PM_STATE_ACTIVE"
+};
 
 int radeon_pm_init(struct radeon_device *rdev)
 {
+	rdev->pm.state = PM_STATE_DISABLED;
+	rdev->pm.planned_action = PM_ACTION_NONE;
+	rdev->pm.downclocked = false;
+	rdev->pm.vblank_callback = false;
+
+	radeon_pm_check_limits(rdev);
+
 	if (radeon_debugfs_pm_init(rdev)) {
 		DRM_ERROR("Failed to register debugfs file for PM!\n");
 	}
 
+	INIT_WORK(&rdev->pm.reclock_work, radeon_pm_reclock_work_handler);
+	INIT_DELAYED_WORK(&rdev->pm.idle_work, radeon_pm_idle_work_handler);
+
+	if (radeon_dynpm != -1 && radeon_dynpm) {
+		rdev->pm.state = PM_STATE_PAUSED;
+		DRM_INFO("radeon: dynamic power management enabled\n");
+	}
+
+	DRM_INFO("radeon: power management initialized\n");
+
 	return 0;
 }
 
+static void radeon_pm_check_limits(struct radeon_device *rdev)
+{
+	rdev->pm.min_gpu_engine_clock = rdev->clock.default_sclk - 5000;
+	rdev->pm.min_gpu_memory_clock = rdev->clock.default_mclk - 5000;
+}
+
+void radeon_pm_compute_clocks(struct radeon_device *rdev)
+{
+	struct drm_device *ddev = rdev->ddev;
+	struct drm_connector *connector;
+	struct radeon_crtc *radeon_crtc;
+	int count = 0;
+
+	if (rdev->pm.state == PM_STATE_DISABLED)
+		return;
+
+	mutex_lock(&rdev->pm.mutex);
+
+	rdev->pm.active_crtcs = 0;
+	list_for_each_entry(connector,
+		&ddev->mode_config.connector_list, head) {
+		if (connector->encoder &&
+			connector->dpms != DRM_MODE_DPMS_OFF) {
+			radeon_crtc = to_radeon_crtc(connector->encoder->crtc);
+			rdev->pm.active_crtcs |= (1 << radeon_crtc->crtc_id);
+			++count;
+		}
+	}
+
+	if (count > 1) {
+		if (rdev->pm.state == PM_STATE_ACTIVE) {
+			wait_queue_head_t wait;
+			init_waitqueue_head(&wait);
+
+			cancel_delayed_work(&rdev->pm.idle_work);
+
+			rdev->pm.state = PM_STATE_PAUSED;
+			rdev->pm.planned_action = PM_ACTION_UPCLOCK;
+			rdev->pm.vblank_callback = true;
+
+			mutex_unlock(&rdev->pm.mutex);
+
+			wait_event_timeout(wait, !rdev->pm.downclocked,
+				msecs_to_jiffies(300));
+			if (!rdev->pm.downclocked)
+				radeon_pm_set_clocks(rdev);
+
+			DRM_DEBUG("radeon: dynamic power management deactivated\n");
+		} else {
+			mutex_unlock(&rdev->pm.mutex);
+		}
+	} else if (count == 1) {
+		rdev->pm.min_mode_engine_clock = rdev->pm.min_gpu_engine_clock;
+		rdev->pm.min_mode_memory_clock = rdev->pm.min_gpu_memory_clock;
+		/* TODO: Increase clocks if needed for current mode */
+
+		if (rdev->pm.state == PM_STATE_MINIMUM) {
+			rdev->pm.state = PM_STATE_ACTIVE;
+			rdev->pm.planned_action = PM_ACTION_UPCLOCK;
+			radeon_pm_set_clocks_locked(rdev);
+
+			queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+				msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+		}
+		else if (rdev->pm.state == PM_STATE_PAUSED) {
+			rdev->pm.state = PM_STATE_ACTIVE;
+			queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+				msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+			DRM_DEBUG("radeon: dynamic power management activated\n");
+		}
+
+		mutex_unlock(&rdev->pm.mutex);
+	}
+	else { /* count == 0 */
+		if (rdev->pm.state != PM_STATE_MINIMUM) {
+			cancel_delayed_work(&rdev->pm.idle_work);
+
+			rdev->pm.state = PM_STATE_MINIMUM;
+			rdev->pm.planned_action = PM_ACTION_MINIMUM;
+			radeon_pm_set_clocks_locked(rdev);
+		}
+
+		mutex_unlock(&rdev->pm.mutex);
+	}
+}
+
+static void radeon_pm_set_clocks_locked(struct radeon_device *rdev)
+{
+	/*radeon_fence_wait_last(rdev);*/
+	switch (rdev->pm.planned_action) {
+	case PM_ACTION_UPCLOCK:
+		radeon_set_engine_clock(rdev, rdev->clock.default_sclk);
+		rdev->pm.downclocked = false;
+		break;
+	case PM_ACTION_DOWNCLOCK:
+		radeon_set_engine_clock(rdev,
+			rdev->pm.min_mode_engine_clock);
+		rdev->pm.downclocked = true;
+		break;
+	case PM_ACTION_MINIMUM:
+		radeon_set_engine_clock(rdev,
+			rdev->pm.min_gpu_engine_clock);
+		break;
+	case PM_ACTION_NONE:
+		DRM_ERROR("%s: PM_ACTION_NONE\n", __func__);
+		break;
+	}
+
+	rdev->pm.planned_action = PM_ACTION_NONE;
+}
+
+static void radeon_pm_set_clocks(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->pm.mutex);
+	/* new VBLANK irq may come before handling previous one */
+	if (rdev->pm.vblank_callback) {
+		mutex_lock(&rdev->cp.mutex);
+		if (rdev->pm.req_vblank & (1 << 0)) {
+			rdev->pm.req_vblank &= ~(1 << 0);
+			drm_vblank_put(rdev->ddev, 0);
+		}
+		if (rdev->pm.req_vblank & (1 << 1)) {
+			rdev->pm.req_vblank &= ~(1 << 1);
+			drm_vblank_put(rdev->ddev, 1);
+		}
+		rdev->pm.vblank_callback = false;
+		radeon_pm_set_clocks_locked(rdev);
+		mutex_unlock(&rdev->cp.mutex);
+	}
+	mutex_unlock(&rdev->pm.mutex);
+}
+
+static void radeon_pm_reclock_work_handler(struct work_struct *work)
+{
+	struct radeon_device *rdev;
+	rdev = container_of(work, struct radeon_device,
+				pm.reclock_work);
+	radeon_pm_set_clocks(rdev);
+}
+
+static void radeon_pm_idle_work_handler(struct work_struct *work)
+{
+	struct radeon_device *rdev;
+	rdev = container_of(work, struct radeon_device,
+				pm.idle_work.work);
+
+	mutex_lock(&rdev->pm.mutex);
+	if (rdev->pm.state == PM_STATE_ACTIVE &&
+		!rdev->pm.vblank_callback) {
+		unsigned long irq_flags;
+		int not_processed = 0;
+
+		read_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+		if (!list_empty(&rdev->fence_drv.emited)) {
+			struct list_head *ptr;
+			list_for_each(ptr, &rdev->fence_drv.emited) {
+				/* count up to 3, that's enought info */
+				if (++not_processed >= 3)
+					break;
+			}
+		}
+		read_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+
+		if (not_processed >= 3) { /* should upclock */
+			if (rdev->pm.planned_action == PM_ACTION_DOWNCLOCK) {
+				rdev->pm.planned_action = PM_ACTION_NONE;
+			} else if (rdev->pm.planned_action == PM_ACTION_NONE &&
+				rdev->pm.downclocked) {
+				rdev->pm.planned_action =
+					PM_ACTION_UPCLOCK;
+				rdev->pm.action_timeout = jiffies +
+				msecs_to_jiffies(RADEON_RECLOCK_DELAY_MS);
+			}
+		} else if (not_processed == 0) { /* should downclock */
+			if (rdev->pm.planned_action == PM_ACTION_UPCLOCK) {
+				rdev->pm.planned_action = PM_ACTION_NONE;
+			} else if (rdev->pm.planned_action == PM_ACTION_NONE &&
+				!rdev->pm.downclocked) {
+				rdev->pm.planned_action =
+					PM_ACTION_DOWNCLOCK;
+				rdev->pm.action_timeout = jiffies +
+				msecs_to_jiffies(RADEON_RECLOCK_DELAY_MS);
+			}
+		}
+
+		if (rdev->pm.planned_action != PM_ACTION_NONE &&
+			jiffies > rdev->pm.action_timeout) {
+			if (rdev->pm.active_crtcs & (1 << 0)) {
+				rdev->pm.req_vblank |= (1 << 0);
+				drm_vblank_get(rdev->ddev, 0);
+			}
+			if (rdev->pm.active_crtcs & (1 << 1)) {
+				rdev->pm.req_vblank |= (1 << 1);
+				drm_vblank_get(rdev->ddev, 1);
+			}
+			rdev->pm.vblank_callback = true;
+		}
+	}
+	mutex_unlock(&rdev->pm.mutex);
+
+	queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+					msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+}
+
 /*
  * Debugfs info
  */
@@ -44,6 +280,7 @@
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
+	seq_printf(m, "state: %s\n", pm_state_names[rdev->pm.state]);
 	seq_printf(m, "default engine clock: %u0 kHz\n", rdev->clock.default_sclk);
 	seq_printf(m, "current engine clock: %u0 kHz\n", radeon_get_engine_clock(rdev));
 	seq_printf(m, "default memory clock: %u0 kHz\n", rdev->clock.default_mclk);
@@ -58,7 +295,7 @@
 };
 #endif
 
-int radeon_debugfs_pm_init(struct radeon_device *rdev)
+static int radeon_debugfs_pm_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	return radeon_debugfs_add_files(rdev, radeon_pm_info_list, ARRAY_SIZE(radeon_pm_info_list));
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index c381856..a581fde 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -406,10 +406,16 @@
 		if (G_000044_SW_INT(status))
 			radeon_fence_process(rdev);
 		/* Vertical blank interrupts */
-		if (G_007EDC_LB_D1_VBLANK_INTERRUPT(r500_disp_int))
+		if (G_007EDC_LB_D1_VBLANK_INTERRUPT(r500_disp_int)) {
 			drm_handle_vblank(rdev->ddev, 0);
-		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(r500_disp_int))
+			if (rdev->pm.vblank_callback)
+				queue_work(rdev->wq, &rdev->pm.reclock_work);
+		}
+		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(r500_disp_int)) {
 			drm_handle_vblank(rdev->ddev, 1);
+			if (rdev->pm.vblank_callback)
+				queue_work(rdev->wq, &rdev->pm.reclock_work);
+		}
 		if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(r500_disp_int)) {
 			queue_hotplug = true;
 			DRM_DEBUG("HPD1\n");