drm/radeon/cik: Add support for compute queues (v4)

On CIK, the compute rings work slightly differently than
on previous asics, however the basic concepts are the same.

The main differences:
- New MEC engines for compute queues
- Multiple queues per MEC:
  - CI/KB: 1 MEC, 4 pipes per MEC, 8 queues per pipe = 32 queues
  -    KV: 2 MEC, 4 pipes per MEC, 8 queues per pipe = 64 queues
- Queues can be allocated and scheduled by another queue
- New doorbell aperture allows you to assign space in the aperture
  for the wptr which allows for userspace access to queues

v2: add wptr shadow, fix eop setup
v3: fix comment
v4: switch to new callback method

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index 79be39e..63514b9 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -460,6 +460,13 @@
 #       define RDERR_INT_ENABLE                         (1 << 0)
 #       define GUI_IDLE_INT_ENABLE                      (1 << 19)
 
+#define CP_CPC_STATUS					0x8210
+#define CP_CPC_BUSY_STAT				0x8214
+#define CP_CPC_STALLED_STAT1				0x8218
+#define CP_CPF_STATUS					0x821c
+#define CP_CPF_BUSY_STAT				0x8220
+#define CP_CPF_STALLED_STAT1				0x8224
+
 #define CP_MEC_CNTL					0x8234
 #define		MEC_ME2_HALT					(1 << 28)
 #define		MEC_ME1_HALT					(1 << 30)
@@ -468,6 +475,12 @@
 #define		MEC_ME2_HALT					(1 << 28)
 #define		MEC_ME1_HALT					(1 << 30)
 
+#define CP_STALLED_STAT3				0x8670
+#define CP_STALLED_STAT1				0x8674
+#define CP_STALLED_STAT2				0x8678
+
+#define CP_STAT						0x8680
+
 #define CP_ME_CNTL					0x86D8
 #define		CP_CE_HALT					(1 << 24)
 #define		CP_PFP_HALT					(1 << 26)
@@ -701,6 +714,11 @@
 #       define CP_RINGID1_INT_STAT                      (1 << 30)
 #       define CP_RINGID0_INT_STAT                      (1 << 31)
 
+#define CP_CPF_DEBUG                                    0xC200
+
+#define CP_PQ_WPTR_POLL_CNTL                            0xC20C
+#define		WPTR_POLL_EN      			(1 << 31)
+
 #define CP_ME1_PIPE0_INT_CNTL                           0xC214
 #define CP_ME1_PIPE1_INT_CNTL                           0xC218
 #define CP_ME1_PIPE2_INT_CNTL                           0xC21C
@@ -773,6 +791,50 @@
 #define RLC_GPM_SCRATCH_ADDR                              0xC4B0
 #define RLC_GPM_SCRATCH_DATA                              0xC4B4
 
+#define CP_HPD_EOP_BASE_ADDR                              0xC904
+#define CP_HPD_EOP_BASE_ADDR_HI                           0xC908
+#define CP_HPD_EOP_VMID                                   0xC90C
+#define CP_HPD_EOP_CONTROL                                0xC910
+#define		EOP_SIZE(x)				((x) << 0)
+#define		EOP_SIZE_MASK      			(0x3f << 0)
+#define CP_MQD_BASE_ADDR                                  0xC914
+#define CP_MQD_BASE_ADDR_HI                               0xC918
+#define CP_HQD_ACTIVE                                     0xC91C
+#define CP_HQD_VMID                                       0xC920
+
+#define CP_HQD_PQ_BASE                                    0xC934
+#define CP_HQD_PQ_BASE_HI                                 0xC938
+#define CP_HQD_PQ_RPTR                                    0xC93C
+#define CP_HQD_PQ_RPTR_REPORT_ADDR                        0xC940
+#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI                     0xC944
+#define CP_HQD_PQ_WPTR_POLL_ADDR                          0xC948
+#define CP_HQD_PQ_WPTR_POLL_ADDR_HI                       0xC94C
+#define CP_HQD_PQ_DOORBELL_CONTROL                        0xC950
+#define		DOORBELL_OFFSET(x)			((x) << 2)
+#define		DOORBELL_OFFSET_MASK			(0x1fffff << 2)
+#define		DOORBELL_SOURCE      			(1 << 28)
+#define		DOORBELL_SCHD_HIT      			(1 << 29)
+#define		DOORBELL_EN      			(1 << 30)
+#define		DOORBELL_HIT      			(1 << 31)
+#define CP_HQD_PQ_WPTR                                    0xC954
+#define CP_HQD_PQ_CONTROL                                 0xC958
+#define		QUEUE_SIZE(x)				((x) << 0)
+#define		QUEUE_SIZE_MASK      			(0x3f << 0)
+#define		RPTR_BLOCK_SIZE(x)			((x) << 8)
+#define		RPTR_BLOCK_SIZE_MASK			(0x3f << 8)
+#define		PQ_VOLATILE      			(1 << 26)
+#define		NO_UPDATE_RPTR      			(1 << 27)
+#define		UNORD_DISPATCH      			(1 << 28)
+#define		ROQ_PQ_IB_FLIP      			(1 << 29)
+#define		PRIV_STATE      			(1 << 30)
+#define		KMD_QUEUE      				(1 << 31)
+
+#define CP_HQD_DEQUEUE_REQUEST                          0xC974
+
+#define CP_MQD_CONTROL                                  0xC99C
+#define		MQD_VMID(x)				((x) << 0)
+#define		MQD_VMID_MASK      			(0xf << 0)
+
 #define PA_SC_RASTER_CONFIG                             0x28350
 #       define RASTER_CONFIG_RB_MAP_0                   0
 #       define RASTER_CONFIG_RB_MAP_1                   1