drm/radeon: add support for SET_APPEND_CNT packet3 (v2)

This adds support to the command parser for the set append counter
packet3, this is required to support atomic counters on
evergreen/cayman GPUs.

v2: fixup some of the hardcoded numbers with real register names
(Christian)

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 9e93205..18ddd75 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2608,6 +2608,51 @@
 			}
 		}
 		break;
+	case PACKET3_SET_APPEND_CNT:
+	{
+		uint32_t areg;
+		uint32_t allowed_reg_base;
+		uint32_t source_sel;
+		if (pkt->count != 2) {
+			DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
+			return -EINVAL;
+		}
+
+		allowed_reg_base = GDS_APPEND_COUNT_0;
+		allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
+		allowed_reg_base >>= 2;
+
+		areg = idx_value >> 16;
+		if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
+			dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
+				 areg, idx);
+			return -EINVAL;
+		}
+
+		source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
+		if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
+			uint64_t offset;
+			uint32_t swap;
+			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
+			if (r) {
+				DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
+				return -EINVAL;
+			}
+			offset = radeon_get_ib_value(p, idx + 1);
+			swap = offset & 0x3;
+			offset &= ~0x3;
+
+			offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
+
+			offset += reloc->gpu_offset;
+			ib[idx+1] = (offset & 0xfffffffc) | swap;
+			ib[idx+2] = upper_32_bits(offset) & 0xff;
+		} else {
+			DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
+			return -EINVAL;
+		}
+		break;
+	}
 	case PACKET3_NOP:
 		break;
 	default: