drm/radeon: properly set up the RLC on ON/LN/TN (v3)

This is required for certain advanced functionality.

v2: save/restore list takes dword offsets
v3: rebase on gpu reset changes

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 6b559cb5..b9f64f0 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -45,6 +45,94 @@
 	EVERGREEN_CRTC5_REGISTER_OFFSET
 };
 
+#include "clearstate_evergreen.h"
+
+static u32 sumo_rlc_save_restore_register_list[] =
+{
+	0x98fc,
+	0x9830,
+	0x9834,
+	0x9838,
+	0x9870,
+	0x9874,
+	0x8a14,
+	0x8b24,
+	0x8bcc,
+	0x8b10,
+	0x8d00,
+	0x8d04,
+	0x8c00,
+	0x8c04,
+	0x8c08,
+	0x8c0c,
+	0x8d8c,
+	0x8c20,
+	0x8c24,
+	0x8c28,
+	0x8c18,
+	0x8c1c,
+	0x8cf0,
+	0x8e2c,
+	0x8e38,
+	0x8c30,
+	0x9508,
+	0x9688,
+	0x9608,
+	0x960c,
+	0x9610,
+	0x9614,
+	0x88c4,
+	0x88d4,
+	0xa008,
+	0x900c,
+	0x9100,
+	0x913c,
+	0x98f8,
+	0x98f4,
+	0x9b7c,
+	0x3f8c,
+	0x8950,
+	0x8954,
+	0x8a18,
+	0x8b28,
+	0x9144,
+	0x9148,
+	0x914c,
+	0x3f90,
+	0x3f94,
+	0x915c,
+	0x9160,
+	0x9178,
+	0x917c,
+	0x9180,
+	0x918c,
+	0x9190,
+	0x9194,
+	0x9198,
+	0x919c,
+	0x91a8,
+	0x91ac,
+	0x91b0,
+	0x91b4,
+	0x91b8,
+	0x91c4,
+	0x91c8,
+	0x91cc,
+	0x91d0,
+	0x91d4,
+	0x91e0,
+	0x91e4,
+	0x91ec,
+	0x91f0,
+	0x91f4,
+	0x9200,
+	0x9204,
+	0x929c,
+	0x9150,
+	0x802c,
+};
+static u32 sumo_rlc_save_restore_register_list_size = ARRAY_SIZE(sumo_rlc_save_restore_register_list);
+
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
 void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
@@ -3723,6 +3811,241 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
+/*
+ * RLC
+ */
+#define RLC_SAVE_RESTORE_LIST_END_MARKER    0x00000000
+#define RLC_CLEAR_STATE_END_MARKER          0x00000001
+
+void sumo_rlc_fini(struct radeon_device *rdev)
+{
+	int r;
+
+	/* save restore block */
+	if (rdev->rlc.save_restore_obj) {
+		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
+		radeon_bo_unpin(rdev->rlc.save_restore_obj);
+		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+
+		radeon_bo_unref(&rdev->rlc.save_restore_obj);
+		rdev->rlc.save_restore_obj = NULL;
+	}
+
+	/* clear state block */
+	if (rdev->rlc.clear_state_obj) {
+		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
+		radeon_bo_unpin(rdev->rlc.clear_state_obj);
+		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+
+		radeon_bo_unref(&rdev->rlc.clear_state_obj);
+		rdev->rlc.clear_state_obj = NULL;
+	}
+}
+
+int sumo_rlc_init(struct radeon_device *rdev)
+{
+	u32 *src_ptr;
+	volatile u32 *dst_ptr;
+	u32 dws, data, i, j, k, reg_num;
+	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
+	u64 reg_list_mc_addr;
+	struct cs_section_def *cs_data;
+	int r;
+
+	src_ptr = rdev->rlc.reg_list;
+	dws = rdev->rlc.reg_list_size;
+	cs_data = rdev->rlc.cs_data;
+
+	/* save restore block */
+	if (rdev->rlc.save_restore_obj == NULL) {
+		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
+			return r;
+		}
+	}
+
+	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+	if (unlikely(r != 0)) {
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->rlc.save_restore_gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
+	if (r) {
+		dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	/* write the sr buffer */
+	dst_ptr = rdev->rlc.sr_ptr;
+	/* format:
+	 * dw0: (reg2 << 16) | reg1
+	 * dw1: reg1 save space
+	 * dw2: reg2 save space
+	 */
+	for (i = 0; i < dws; i++) {
+		data = src_ptr[i] >> 2;
+		i++;
+		if (i < dws)
+			data |= (src_ptr[i] >> 2) << 16;
+		j = (((i - 1) * 3) / 2);
+		dst_ptr[j] = data;
+	}
+	j = ((i * 3) / 2);
+	dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER;
+
+	radeon_bo_kunmap(rdev->rlc.save_restore_obj);
+	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+
+	/* clear state block */
+	reg_list_num = 0;
+	dws = 0;
+	for (i = 0; cs_data[i].section != NULL; i++) {
+		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+			reg_list_num++;
+			dws += cs_data[i].section[j].reg_count;
+		}
+	}
+	reg_list_blk_index = (3 * reg_list_num + 2);
+	dws += reg_list_blk_index;
+
+	if (rdev->rlc.clear_state_obj == NULL) {
+		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+	}
+	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+	if (unlikely(r != 0)) {
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->rlc.clear_state_gpu_addr);
+	if (r) {
+
+		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
+	if (r) {
+		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
+		sumo_rlc_fini(rdev);
+		return r;
+	}
+	/* set up the cs buffer */
+	dst_ptr = rdev->rlc.cs_ptr;
+	reg_list_hdr_blk_index = 0;
+	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
+	data = upper_32_bits(reg_list_mc_addr);
+	dst_ptr[reg_list_hdr_blk_index] = data;
+	reg_list_hdr_blk_index++;
+	for (i = 0; cs_data[i].section != NULL; i++) {
+		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+			reg_num = cs_data[i].section[j].reg_count;
+			data = reg_list_mc_addr & 0xffffffff;
+			dst_ptr[reg_list_hdr_blk_index] = data;
+			reg_list_hdr_blk_index++;
+
+			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
+			dst_ptr[reg_list_hdr_blk_index] = data;
+			reg_list_hdr_blk_index++;
+
+			data = 0x08000000 | (reg_num * 4);
+			dst_ptr[reg_list_hdr_blk_index] = data;
+			reg_list_hdr_blk_index++;
+
+			for (k = 0; k < reg_num; k++) {
+				data = cs_data[i].section[j].extent[k];
+				dst_ptr[reg_list_blk_index + k] = data;
+			}
+			reg_list_mc_addr += reg_num * 4;
+			reg_list_blk_index += reg_num;
+		}
+	}
+	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
+
+	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
+	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+
+	return 0;
+}
+
+static void evergreen_rlc_start(struct radeon_device *rdev)
+{
+	if (rdev->flags & RADEON_IS_IGP)
+		WREG32(RLC_CNTL, RLC_ENABLE | GFX_POWER_GATING_ENABLE | GFX_POWER_GATING_SRC);
+	else
+		WREG32(RLC_CNTL, RLC_ENABLE);
+}
+
+int evergreen_rlc_resume(struct radeon_device *rdev)
+{
+	u32 i;
+	const __be32 *fw_data;
+
+	if (!rdev->rlc_fw)
+		return -EINVAL;
+
+	r600_rlc_stop(rdev);
+
+	WREG32(RLC_HB_CNTL, 0);
+
+	if (rdev->flags & RADEON_IS_IGP) {
+		WREG32(TN_RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+		WREG32(TN_RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
+	} else {
+		WREG32(RLC_HB_BASE, 0);
+		WREG32(RLC_HB_RPTR, 0);
+		WREG32(RLC_HB_WPTR, 0);
+	}
+	WREG32(RLC_HB_WPTR_LSB_ADDR, 0);
+	WREG32(RLC_HB_WPTR_MSB_ADDR, 0);
+	WREG32(RLC_MC_CNTL, 0);
+	WREG32(RLC_UCODE_CNTL, 0);
+
+	fw_data = (const __be32 *)rdev->rlc_fw->data;
+	if (rdev->family >= CHIP_ARUBA) {
+		for (i = 0; i < ARUBA_RLC_UCODE_SIZE; i++) {
+			WREG32(RLC_UCODE_ADDR, i);
+			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
+		}
+	} else if (rdev->family >= CHIP_CAYMAN) {
+		for (i = 0; i < CAYMAN_RLC_UCODE_SIZE; i++) {
+			WREG32(RLC_UCODE_ADDR, i);
+			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
+		}
+	} else {
+		for (i = 0; i < EVERGREEN_RLC_UCODE_SIZE; i++) {
+			WREG32(RLC_UCODE_ADDR, i);
+			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
+		}
+	}
+	WREG32(RLC_UCODE_ADDR, 0);
+
+	evergreen_rlc_start(rdev);
+
+	return 0;
+}
+
 /* Interrupts */
 
 u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc)
@@ -4721,6 +5044,18 @@
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
 
+	/* allocate rlc buffers */
+	if (rdev->flags & RADEON_IS_IGP) {
+		rdev->rlc.reg_list = sumo_rlc_save_restore_register_list;
+		rdev->rlc.reg_list_size = sumo_rlc_save_restore_register_list_size;
+		rdev->rlc.cs_data = evergreen_cs_data;
+		r = sumo_rlc_init(rdev);
+		if (r) {
+			DRM_ERROR("Failed to init rlc BOs!\n");
+			return r;
+		}
+	}
+
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
 	if (r)
@@ -4952,6 +5287,8 @@
 		r700_cp_fini(rdev);
 		r600_dma_fini(rdev);
 		r600_irq_fini(rdev);
+		if (rdev->flags & RADEON_IS_IGP)
+			sumo_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
@@ -4980,6 +5317,8 @@
 	r700_cp_fini(rdev);
 	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
+	if (rdev->flags & RADEON_IS_IGP)
+		sumo_rlc_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);