blob: fa4c9acc2f4b109be5f16ffd7838aea32fb89098 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher6e2c3c02013-04-03 19:28:32 -040081/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
Alex Deucher2c679122013-04-09 13:32:18 -0400102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
Alex Deucher75efdee2013-03-04 12:47:46 -0500124/**
125 * cik_mm_rdoorbell - read a doorbell dword
126 *
127 * @rdev: radeon_device pointer
128 * @offset: byte offset into the aperture
129 *
130 * Returns the value in the doorbell aperture at the
131 * requested offset (CIK).
132 */
133u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
134{
135 if (offset < rdev->doorbell.size) {
136 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
137 } else {
138 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
139 return 0;
140 }
141}
142
143/**
144 * cik_mm_wdoorbell - write a doorbell dword
145 *
146 * @rdev: radeon_device pointer
147 * @offset: byte offset into the aperture
148 * @v: value to write
149 *
150 * Writes @v to the doorbell aperture at the
151 * requested offset (CIK).
152 */
153void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
154{
155 if (offset < rdev->doorbell.size) {
156 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
157 } else {
158 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
159 }
160}
161
Alex Deucherbc8273f2012-06-29 19:44:04 -0400162#define BONAIRE_IO_MC_REGS_SIZE 36
163
164static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
165{
166 {0x00000070, 0x04400000},
167 {0x00000071, 0x80c01803},
168 {0x00000072, 0x00004004},
169 {0x00000073, 0x00000100},
170 {0x00000074, 0x00ff0000},
171 {0x00000075, 0x34000000},
172 {0x00000076, 0x08000014},
173 {0x00000077, 0x00cc08ec},
174 {0x00000078, 0x00000400},
175 {0x00000079, 0x00000000},
176 {0x0000007a, 0x04090000},
177 {0x0000007c, 0x00000000},
178 {0x0000007e, 0x4408a8e8},
179 {0x0000007f, 0x00000304},
180 {0x00000080, 0x00000000},
181 {0x00000082, 0x00000001},
182 {0x00000083, 0x00000002},
183 {0x00000084, 0xf3e4f400},
184 {0x00000085, 0x052024e3},
185 {0x00000087, 0x00000000},
186 {0x00000088, 0x01000000},
187 {0x0000008a, 0x1c0a0000},
188 {0x0000008b, 0xff010000},
189 {0x0000008d, 0xffffefff},
190 {0x0000008e, 0xfff3efff},
191 {0x0000008f, 0xfff3efbf},
192 {0x00000092, 0xf7ffffff},
193 {0x00000093, 0xffffff7f},
194 {0x00000095, 0x00101101},
195 {0x00000096, 0x00000fff},
196 {0x00000097, 0x00116fff},
197 {0x00000098, 0x60010000},
198 {0x00000099, 0x10010000},
199 {0x0000009a, 0x00006000},
200 {0x0000009b, 0x00001000},
201 {0x0000009f, 0x00b48000}
202};
203
Alex Deucherb556b122013-01-29 10:44:22 -0500204/**
205 * cik_srbm_select - select specific register instances
206 *
207 * @rdev: radeon_device pointer
208 * @me: selected ME (micro engine)
209 * @pipe: pipe
210 * @queue: queue
211 * @vmid: VMID
212 *
213 * Switches the currently active registers instances. Some
214 * registers are instanced per VMID, others are instanced per
215 * me/pipe/queue combination.
216 */
217static void cik_srbm_select(struct radeon_device *rdev,
218 u32 me, u32 pipe, u32 queue, u32 vmid)
219{
220 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
221 MEID(me & 0x3) |
222 VMID(vmid & 0xf) |
223 QUEUEID(queue & 0x7));
224 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
225}
226
Alex Deucherbc8273f2012-06-29 19:44:04 -0400227/* ucode loading */
228/**
229 * ci_mc_load_microcode - load MC ucode into the hw
230 *
231 * @rdev: radeon_device pointer
232 *
233 * Load the GDDR MC ucode into the hw (CIK).
234 * Returns 0 on success, error on failure.
235 */
236static int ci_mc_load_microcode(struct radeon_device *rdev)
237{
238 const __be32 *fw_data;
239 u32 running, blackout = 0;
240 u32 *io_mc_regs;
241 int i, ucode_size, regs_size;
242
243 if (!rdev->mc_fw)
244 return -EINVAL;
245
246 switch (rdev->family) {
247 case CHIP_BONAIRE:
248 default:
249 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
250 ucode_size = CIK_MC_UCODE_SIZE;
251 regs_size = BONAIRE_IO_MC_REGS_SIZE;
252 break;
253 }
254
255 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
256
257 if (running == 0) {
258 if (running) {
259 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
260 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
261 }
262
263 /* reset the engine and set to writable */
264 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
265 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
266
267 /* load mc io regs */
268 for (i = 0; i < regs_size; i++) {
269 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
270 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
271 }
272 /* load the MC ucode */
273 fw_data = (const __be32 *)rdev->mc_fw->data;
274 for (i = 0; i < ucode_size; i++)
275 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
276
277 /* put the engine back into the active state */
278 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
279 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
280 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
281
282 /* wait for training to complete */
283 for (i = 0; i < rdev->usec_timeout; i++) {
284 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
285 break;
286 udelay(1);
287 }
288 for (i = 0; i < rdev->usec_timeout; i++) {
289 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
290 break;
291 udelay(1);
292 }
293
294 if (running)
295 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
296 }
297
298 return 0;
299}
300
Alex Deucher02c81322012-12-18 21:43:07 -0500301/**
302 * cik_init_microcode - load ucode images from disk
303 *
304 * @rdev: radeon_device pointer
305 *
306 * Use the firmware interface to load the ucode images into
307 * the driver (not loaded into hw).
308 * Returns 0 on success, error on failure.
309 */
310static int cik_init_microcode(struct radeon_device *rdev)
311{
312 struct platform_device *pdev;
313 const char *chip_name;
314 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400315 mec_req_size, rlc_req_size, mc_req_size,
316 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500317 char fw_name[30];
318 int err;
319
320 DRM_DEBUG("\n");
321
322 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
323 err = IS_ERR(pdev);
324 if (err) {
325 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
326 return -EINVAL;
327 }
328
329 switch (rdev->family) {
330 case CHIP_BONAIRE:
331 chip_name = "BONAIRE";
332 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
333 me_req_size = CIK_ME_UCODE_SIZE * 4;
334 ce_req_size = CIK_CE_UCODE_SIZE * 4;
335 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
336 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
337 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400338 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500339 break;
340 case CHIP_KAVERI:
341 chip_name = "KAVERI";
342 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
343 me_req_size = CIK_ME_UCODE_SIZE * 4;
344 ce_req_size = CIK_CE_UCODE_SIZE * 4;
345 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
346 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400347 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500348 break;
349 case CHIP_KABINI:
350 chip_name = "KABINI";
351 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
352 me_req_size = CIK_ME_UCODE_SIZE * 4;
353 ce_req_size = CIK_CE_UCODE_SIZE * 4;
354 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
355 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400356 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500357 break;
358 default: BUG();
359 }
360
361 DRM_INFO("Loading %s Microcode\n", chip_name);
362
363 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
364 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
365 if (err)
366 goto out;
367 if (rdev->pfp_fw->size != pfp_req_size) {
368 printk(KERN_ERR
369 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
370 rdev->pfp_fw->size, fw_name);
371 err = -EINVAL;
372 goto out;
373 }
374
375 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
376 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
377 if (err)
378 goto out;
379 if (rdev->me_fw->size != me_req_size) {
380 printk(KERN_ERR
381 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
382 rdev->me_fw->size, fw_name);
383 err = -EINVAL;
384 }
385
386 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
387 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
388 if (err)
389 goto out;
390 if (rdev->ce_fw->size != ce_req_size) {
391 printk(KERN_ERR
392 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
393 rdev->ce_fw->size, fw_name);
394 err = -EINVAL;
395 }
396
397 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
398 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
399 if (err)
400 goto out;
401 if (rdev->mec_fw->size != mec_req_size) {
402 printk(KERN_ERR
403 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
404 rdev->mec_fw->size, fw_name);
405 err = -EINVAL;
406 }
407
408 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
409 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
410 if (err)
411 goto out;
412 if (rdev->rlc_fw->size != rlc_req_size) {
413 printk(KERN_ERR
414 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
415 rdev->rlc_fw->size, fw_name);
416 err = -EINVAL;
417 }
418
Alex Deucher21a93e12013-04-09 12:47:11 -0400419 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
420 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
421 if (err)
422 goto out;
423 if (rdev->sdma_fw->size != sdma_req_size) {
424 printk(KERN_ERR
425 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
426 rdev->sdma_fw->size, fw_name);
427 err = -EINVAL;
428 }
429
Alex Deucher02c81322012-12-18 21:43:07 -0500430 /* No MC ucode on APUs */
431 if (!(rdev->flags & RADEON_IS_IGP)) {
432 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
433 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
434 if (err)
435 goto out;
436 if (rdev->mc_fw->size != mc_req_size) {
437 printk(KERN_ERR
438 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
439 rdev->mc_fw->size, fw_name);
440 err = -EINVAL;
441 }
442 }
443
444out:
445 platform_device_unregister(pdev);
446
447 if (err) {
448 if (err != -EINVAL)
449 printk(KERN_ERR
450 "cik_cp: Failed to load firmware \"%s\"\n",
451 fw_name);
452 release_firmware(rdev->pfp_fw);
453 rdev->pfp_fw = NULL;
454 release_firmware(rdev->me_fw);
455 rdev->me_fw = NULL;
456 release_firmware(rdev->ce_fw);
457 rdev->ce_fw = NULL;
458 release_firmware(rdev->rlc_fw);
459 rdev->rlc_fw = NULL;
460 release_firmware(rdev->mc_fw);
461 rdev->mc_fw = NULL;
462 }
463 return err;
464}
465
Alex Deucher8cc1a532013-04-09 12:41:24 -0400466/*
467 * Core functions
468 */
469/**
470 * cik_tiling_mode_table_init - init the hw tiling table
471 *
472 * @rdev: radeon_device pointer
473 *
474 * Starting with SI, the tiling setup is done globally in a
475 * set of 32 tiling modes. Rather than selecting each set of
476 * parameters per surface as on older asics, we just select
477 * which index in the tiling table we want to use, and the
478 * surface uses those parameters (CIK).
479 */
480static void cik_tiling_mode_table_init(struct radeon_device *rdev)
481{
482 const u32 num_tile_mode_states = 32;
483 const u32 num_secondary_tile_mode_states = 16;
484 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
485 u32 num_pipe_configs;
486 u32 num_rbs = rdev->config.cik.max_backends_per_se *
487 rdev->config.cik.max_shader_engines;
488
489 switch (rdev->config.cik.mem_row_size_in_kb) {
490 case 1:
491 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
492 break;
493 case 2:
494 default:
495 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
496 break;
497 case 4:
498 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
499 break;
500 }
501
502 num_pipe_configs = rdev->config.cik.max_tile_pipes;
503 if (num_pipe_configs > 8)
504 num_pipe_configs = 8; /* ??? */
505
506 if (num_pipe_configs == 8) {
507 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
508 switch (reg_offset) {
509 case 0:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
514 break;
515 case 1:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
520 break;
521 case 2:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
526 break;
527 case 3:
528 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
532 break;
533 case 4:
534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
537 TILE_SPLIT(split_equal_to_row_size));
538 break;
539 case 5:
540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
542 break;
543 case 6:
544 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
548 break;
549 case 7:
550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
553 TILE_SPLIT(split_equal_to_row_size));
554 break;
555 case 8:
556 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
558 break;
559 case 9:
560 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
562 break;
563 case 10:
564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
565 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
568 break;
569 case 11:
570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 break;
575 case 12:
576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
580 break;
581 case 13:
582 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
584 break;
585 case 14:
586 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
590 break;
591 case 16:
592 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
596 break;
597 case 17:
598 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
602 break;
603 case 27:
604 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
606 break;
607 case 28:
608 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
609 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
612 break;
613 case 29:
614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
616 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
618 break;
619 case 30:
620 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
624 break;
625 default:
626 gb_tile_moden = 0;
627 break;
628 }
629 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
630 }
631 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
632 switch (reg_offset) {
633 case 0:
634 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
637 NUM_BANKS(ADDR_SURF_16_BANK));
638 break;
639 case 1:
640 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
643 NUM_BANKS(ADDR_SURF_16_BANK));
644 break;
645 case 2:
646 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
649 NUM_BANKS(ADDR_SURF_16_BANK));
650 break;
651 case 3:
652 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
655 NUM_BANKS(ADDR_SURF_16_BANK));
656 break;
657 case 4:
658 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
661 NUM_BANKS(ADDR_SURF_8_BANK));
662 break;
663 case 5:
664 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
667 NUM_BANKS(ADDR_SURF_4_BANK));
668 break;
669 case 6:
670 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
673 NUM_BANKS(ADDR_SURF_2_BANK));
674 break;
675 case 8:
676 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
679 NUM_BANKS(ADDR_SURF_16_BANK));
680 break;
681 case 9:
682 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
685 NUM_BANKS(ADDR_SURF_16_BANK));
686 break;
687 case 10:
688 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
691 NUM_BANKS(ADDR_SURF_16_BANK));
692 break;
693 case 11:
694 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
697 NUM_BANKS(ADDR_SURF_16_BANK));
698 break;
699 case 12:
700 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
703 NUM_BANKS(ADDR_SURF_8_BANK));
704 break;
705 case 13:
706 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
709 NUM_BANKS(ADDR_SURF_4_BANK));
710 break;
711 case 14:
712 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
715 NUM_BANKS(ADDR_SURF_2_BANK));
716 break;
717 default:
718 gb_tile_moden = 0;
719 break;
720 }
721 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
722 }
723 } else if (num_pipe_configs == 4) {
724 if (num_rbs == 4) {
725 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
726 switch (reg_offset) {
727 case 0:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
732 break;
733 case 1:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
738 break;
739 case 2:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
744 break;
745 case 3:
746 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
750 break;
751 case 4:
752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
755 TILE_SPLIT(split_equal_to_row_size));
756 break;
757 case 5:
758 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
760 break;
761 case 6:
762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
766 break;
767 case 7:
768 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
769 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
771 TILE_SPLIT(split_equal_to_row_size));
772 break;
773 case 8:
774 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
775 PIPE_CONFIG(ADDR_SURF_P4_16x16));
776 break;
777 case 9:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
780 break;
781 case 10:
782 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
783 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
786 break;
787 case 11:
788 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792 break;
793 case 12:
794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
798 break;
799 case 13:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
802 break;
803 case 14:
804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
808 break;
809 case 16:
810 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
814 break;
815 case 17:
816 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
820 break;
821 case 27:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
824 break;
825 case 28:
826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
830 break;
831 case 29:
832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
836 break;
837 case 30:
838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
842 break;
843 default:
844 gb_tile_moden = 0;
845 break;
846 }
847 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
848 }
849 } else if (num_rbs < 4) {
850 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
851 switch (reg_offset) {
852 case 0:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
857 break;
858 case 1:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
863 break;
864 case 2:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
867 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
869 break;
870 case 3:
871 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
873 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
875 break;
876 case 4:
877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
879 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
880 TILE_SPLIT(split_equal_to_row_size));
881 break;
882 case 5:
883 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
885 break;
886 case 6:
887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
889 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
890 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
891 break;
892 case 7:
893 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
895 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
896 TILE_SPLIT(split_equal_to_row_size));
897 break;
898 case 8:
899 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
900 PIPE_CONFIG(ADDR_SURF_P4_8x16));
901 break;
902 case 9:
903 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
905 break;
906 case 10:
907 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
908 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
909 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
911 break;
912 case 11:
913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
917 break;
918 case 12:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
923 break;
924 case 13:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
927 break;
928 case 14:
929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
931 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
933 break;
934 case 16:
935 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
937 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
939 break;
940 case 17:
941 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
943 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
945 break;
946 case 27:
947 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
949 break;
950 case 28:
951 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
955 break;
956 case 29:
957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
959 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
961 break;
962 case 30:
963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
965 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
967 break;
968 default:
969 gb_tile_moden = 0;
970 break;
971 }
972 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
973 }
974 }
975 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
976 switch (reg_offset) {
977 case 0:
978 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
981 NUM_BANKS(ADDR_SURF_16_BANK));
982 break;
983 case 1:
984 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
987 NUM_BANKS(ADDR_SURF_16_BANK));
988 break;
989 case 2:
990 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
993 NUM_BANKS(ADDR_SURF_16_BANK));
994 break;
995 case 3:
996 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
999 NUM_BANKS(ADDR_SURF_16_BANK));
1000 break;
1001 case 4:
1002 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1005 NUM_BANKS(ADDR_SURF_16_BANK));
1006 break;
1007 case 5:
1008 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1011 NUM_BANKS(ADDR_SURF_8_BANK));
1012 break;
1013 case 6:
1014 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1017 NUM_BANKS(ADDR_SURF_4_BANK));
1018 break;
1019 case 8:
1020 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1023 NUM_BANKS(ADDR_SURF_16_BANK));
1024 break;
1025 case 9:
1026 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1029 NUM_BANKS(ADDR_SURF_16_BANK));
1030 break;
1031 case 10:
1032 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1035 NUM_BANKS(ADDR_SURF_16_BANK));
1036 break;
1037 case 11:
1038 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1041 NUM_BANKS(ADDR_SURF_16_BANK));
1042 break;
1043 case 12:
1044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1047 NUM_BANKS(ADDR_SURF_16_BANK));
1048 break;
1049 case 13:
1050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1053 NUM_BANKS(ADDR_SURF_8_BANK));
1054 break;
1055 case 14:
1056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1059 NUM_BANKS(ADDR_SURF_4_BANK));
1060 break;
1061 default:
1062 gb_tile_moden = 0;
1063 break;
1064 }
1065 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1066 }
1067 } else if (num_pipe_configs == 2) {
1068 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1069 switch (reg_offset) {
1070 case 0:
1071 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1073 PIPE_CONFIG(ADDR_SURF_P2) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1075 break;
1076 case 1:
1077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1079 PIPE_CONFIG(ADDR_SURF_P2) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1081 break;
1082 case 2:
1083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1085 PIPE_CONFIG(ADDR_SURF_P2) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1087 break;
1088 case 3:
1089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1091 PIPE_CONFIG(ADDR_SURF_P2) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1093 break;
1094 case 4:
1095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1097 PIPE_CONFIG(ADDR_SURF_P2) |
1098 TILE_SPLIT(split_equal_to_row_size));
1099 break;
1100 case 5:
1101 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1103 break;
1104 case 6:
1105 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1107 PIPE_CONFIG(ADDR_SURF_P2) |
1108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1109 break;
1110 case 7:
1111 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1113 PIPE_CONFIG(ADDR_SURF_P2) |
1114 TILE_SPLIT(split_equal_to_row_size));
1115 break;
1116 case 8:
1117 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1118 break;
1119 case 9:
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1122 break;
1123 case 10:
1124 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1126 PIPE_CONFIG(ADDR_SURF_P2) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1128 break;
1129 case 11:
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P2) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1134 break;
1135 case 12:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P2) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1140 break;
1141 case 13:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1144 break;
1145 case 14:
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P2) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1150 break;
1151 case 16:
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P2) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1156 break;
1157 case 17:
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P2) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1162 break;
1163 case 27:
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1166 break;
1167 case 28:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P2) |
1171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1172 break;
1173 case 29:
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P2) |
1177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1178 break;
1179 case 30:
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P2) |
1183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1184 break;
1185 default:
1186 gb_tile_moden = 0;
1187 break;
1188 }
1189 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1190 }
1191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1192 switch (reg_offset) {
1193 case 0:
1194 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 break;
1199 case 1:
1200 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1203 NUM_BANKS(ADDR_SURF_16_BANK));
1204 break;
1205 case 2:
1206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1209 NUM_BANKS(ADDR_SURF_16_BANK));
1210 break;
1211 case 3:
1212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1215 NUM_BANKS(ADDR_SURF_16_BANK));
1216 break;
1217 case 4:
1218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1221 NUM_BANKS(ADDR_SURF_16_BANK));
1222 break;
1223 case 5:
1224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1227 NUM_BANKS(ADDR_SURF_16_BANK));
1228 break;
1229 case 6:
1230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1233 NUM_BANKS(ADDR_SURF_8_BANK));
1234 break;
1235 case 8:
1236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1239 NUM_BANKS(ADDR_SURF_16_BANK));
1240 break;
1241 case 9:
1242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1245 NUM_BANKS(ADDR_SURF_16_BANK));
1246 break;
1247 case 10:
1248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1251 NUM_BANKS(ADDR_SURF_16_BANK));
1252 break;
1253 case 11:
1254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1257 NUM_BANKS(ADDR_SURF_16_BANK));
1258 break;
1259 case 12:
1260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1263 NUM_BANKS(ADDR_SURF_16_BANK));
1264 break;
1265 case 13:
1266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1269 NUM_BANKS(ADDR_SURF_16_BANK));
1270 break;
1271 case 14:
1272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1275 NUM_BANKS(ADDR_SURF_8_BANK));
1276 break;
1277 default:
1278 gb_tile_moden = 0;
1279 break;
1280 }
1281 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1282 }
1283 } else
1284 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1285}
1286
1287/**
1288 * cik_select_se_sh - select which SE, SH to address
1289 *
1290 * @rdev: radeon_device pointer
1291 * @se_num: shader engine to address
1292 * @sh_num: sh block to address
1293 *
1294 * Select which SE, SH combinations to address. Certain
1295 * registers are instanced per SE or SH. 0xffffffff means
1296 * broadcast to all SEs or SHs (CIK).
1297 */
1298static void cik_select_se_sh(struct radeon_device *rdev,
1299 u32 se_num, u32 sh_num)
1300{
1301 u32 data = INSTANCE_BROADCAST_WRITES;
1302
1303 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1304 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1305 else if (se_num == 0xffffffff)
1306 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1307 else if (sh_num == 0xffffffff)
1308 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1309 else
1310 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1311 WREG32(GRBM_GFX_INDEX, data);
1312}
1313
1314/**
1315 * cik_create_bitmask - create a bitmask
1316 *
1317 * @bit_width: length of the mask
1318 *
1319 * create a variable length bit mask (CIK).
1320 * Returns the bitmask.
1321 */
1322static u32 cik_create_bitmask(u32 bit_width)
1323{
1324 u32 i, mask = 0;
1325
1326 for (i = 0; i < bit_width; i++) {
1327 mask <<= 1;
1328 mask |= 1;
1329 }
1330 return mask;
1331}
1332
1333/**
1334 * cik_select_se_sh - select which SE, SH to address
1335 *
1336 * @rdev: radeon_device pointer
1337 * @max_rb_num: max RBs (render backends) for the asic
1338 * @se_num: number of SEs (shader engines) for the asic
1339 * @sh_per_se: number of SH blocks per SE for the asic
1340 *
1341 * Calculates the bitmask of disabled RBs (CIK).
1342 * Returns the disabled RB bitmask.
1343 */
1344static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1345 u32 max_rb_num, u32 se_num,
1346 u32 sh_per_se)
1347{
1348 u32 data, mask;
1349
1350 data = RREG32(CC_RB_BACKEND_DISABLE);
1351 if (data & 1)
1352 data &= BACKEND_DISABLE_MASK;
1353 else
1354 data = 0;
1355 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1356
1357 data >>= BACKEND_DISABLE_SHIFT;
1358
1359 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1360
1361 return data & mask;
1362}
1363
1364/**
1365 * cik_setup_rb - setup the RBs on the asic
1366 *
1367 * @rdev: radeon_device pointer
1368 * @se_num: number of SEs (shader engines) for the asic
1369 * @sh_per_se: number of SH blocks per SE for the asic
1370 * @max_rb_num: max RBs (render backends) for the asic
1371 *
1372 * Configures per-SE/SH RB registers (CIK).
1373 */
1374static void cik_setup_rb(struct radeon_device *rdev,
1375 u32 se_num, u32 sh_per_se,
1376 u32 max_rb_num)
1377{
1378 int i, j;
1379 u32 data, mask;
1380 u32 disabled_rbs = 0;
1381 u32 enabled_rbs = 0;
1382
1383 for (i = 0; i < se_num; i++) {
1384 for (j = 0; j < sh_per_se; j++) {
1385 cik_select_se_sh(rdev, i, j);
1386 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1387 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1388 }
1389 }
1390 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1391
1392 mask = 1;
1393 for (i = 0; i < max_rb_num; i++) {
1394 if (!(disabled_rbs & mask))
1395 enabled_rbs |= mask;
1396 mask <<= 1;
1397 }
1398
1399 for (i = 0; i < se_num; i++) {
1400 cik_select_se_sh(rdev, i, 0xffffffff);
1401 data = 0;
1402 for (j = 0; j < sh_per_se; j++) {
1403 switch (enabled_rbs & 3) {
1404 case 1:
1405 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1406 break;
1407 case 2:
1408 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1409 break;
1410 case 3:
1411 default:
1412 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1413 break;
1414 }
1415 enabled_rbs >>= 2;
1416 }
1417 WREG32(PA_SC_RASTER_CONFIG, data);
1418 }
1419 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1420}
1421
1422/**
1423 * cik_gpu_init - setup the 3D engine
1424 *
1425 * @rdev: radeon_device pointer
1426 *
1427 * Configures the 3D engine and tiling configuration
1428 * registers so that the 3D engine is usable.
1429 */
1430static void cik_gpu_init(struct radeon_device *rdev)
1431{
1432 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1433 u32 mc_shared_chmap, mc_arb_ramcfg;
1434 u32 hdp_host_path_cntl;
1435 u32 tmp;
1436 int i, j;
1437
1438 switch (rdev->family) {
1439 case CHIP_BONAIRE:
1440 rdev->config.cik.max_shader_engines = 2;
1441 rdev->config.cik.max_tile_pipes = 4;
1442 rdev->config.cik.max_cu_per_sh = 7;
1443 rdev->config.cik.max_sh_per_se = 1;
1444 rdev->config.cik.max_backends_per_se = 2;
1445 rdev->config.cik.max_texture_channel_caches = 4;
1446 rdev->config.cik.max_gprs = 256;
1447 rdev->config.cik.max_gs_threads = 32;
1448 rdev->config.cik.max_hw_contexts = 8;
1449
1450 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1451 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1452 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1453 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1454 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1455 break;
1456 case CHIP_KAVERI:
1457 /* TODO */
1458 break;
1459 case CHIP_KABINI:
1460 default:
1461 rdev->config.cik.max_shader_engines = 1;
1462 rdev->config.cik.max_tile_pipes = 2;
1463 rdev->config.cik.max_cu_per_sh = 2;
1464 rdev->config.cik.max_sh_per_se = 1;
1465 rdev->config.cik.max_backends_per_se = 1;
1466 rdev->config.cik.max_texture_channel_caches = 2;
1467 rdev->config.cik.max_gprs = 256;
1468 rdev->config.cik.max_gs_threads = 16;
1469 rdev->config.cik.max_hw_contexts = 8;
1470
1471 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1472 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1473 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1474 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1475 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1476 break;
1477 }
1478
1479 /* Initialize HDP */
1480 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1481 WREG32((0x2c14 + j), 0x00000000);
1482 WREG32((0x2c18 + j), 0x00000000);
1483 WREG32((0x2c1c + j), 0x00000000);
1484 WREG32((0x2c20 + j), 0x00000000);
1485 WREG32((0x2c24 + j), 0x00000000);
1486 }
1487
1488 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1489
1490 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1491
1492 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1493 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1494
1495 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1496 rdev->config.cik.mem_max_burst_length_bytes = 256;
1497 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1498 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1499 if (rdev->config.cik.mem_row_size_in_kb > 4)
1500 rdev->config.cik.mem_row_size_in_kb = 4;
1501 /* XXX use MC settings? */
1502 rdev->config.cik.shader_engine_tile_size = 32;
1503 rdev->config.cik.num_gpus = 1;
1504 rdev->config.cik.multi_gpu_tile_size = 64;
1505
1506 /* fix up row size */
1507 gb_addr_config &= ~ROW_SIZE_MASK;
1508 switch (rdev->config.cik.mem_row_size_in_kb) {
1509 case 1:
1510 default:
1511 gb_addr_config |= ROW_SIZE(0);
1512 break;
1513 case 2:
1514 gb_addr_config |= ROW_SIZE(1);
1515 break;
1516 case 4:
1517 gb_addr_config |= ROW_SIZE(2);
1518 break;
1519 }
1520
1521 /* setup tiling info dword. gb_addr_config is not adequate since it does
1522 * not have bank info, so create a custom tiling dword.
1523 * bits 3:0 num_pipes
1524 * bits 7:4 num_banks
1525 * bits 11:8 group_size
1526 * bits 15:12 row_size
1527 */
1528 rdev->config.cik.tile_config = 0;
1529 switch (rdev->config.cik.num_tile_pipes) {
1530 case 1:
1531 rdev->config.cik.tile_config |= (0 << 0);
1532 break;
1533 case 2:
1534 rdev->config.cik.tile_config |= (1 << 0);
1535 break;
1536 case 4:
1537 rdev->config.cik.tile_config |= (2 << 0);
1538 break;
1539 case 8:
1540 default:
1541 /* XXX what about 12? */
1542 rdev->config.cik.tile_config |= (3 << 0);
1543 break;
1544 }
1545 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1546 rdev->config.cik.tile_config |= 1 << 4;
1547 else
1548 rdev->config.cik.tile_config |= 0 << 4;
1549 rdev->config.cik.tile_config |=
1550 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1551 rdev->config.cik.tile_config |=
1552 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1553
1554 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1555 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1556 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001557 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1558 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001559 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1560 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1561 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001562
1563 cik_tiling_mode_table_init(rdev);
1564
1565 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1566 rdev->config.cik.max_sh_per_se,
1567 rdev->config.cik.max_backends_per_se);
1568
1569 /* set HW defaults for 3D engine */
1570 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1571
1572 WREG32(SX_DEBUG_1, 0x20);
1573
1574 WREG32(TA_CNTL_AUX, 0x00010000);
1575
1576 tmp = RREG32(SPI_CONFIG_CNTL);
1577 tmp |= 0x03000000;
1578 WREG32(SPI_CONFIG_CNTL, tmp);
1579
1580 WREG32(SQ_CONFIG, 1);
1581
1582 WREG32(DB_DEBUG, 0);
1583
1584 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1585 tmp |= 0x00000400;
1586 WREG32(DB_DEBUG2, tmp);
1587
1588 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1589 tmp |= 0x00020200;
1590 WREG32(DB_DEBUG3, tmp);
1591
1592 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1593 tmp |= 0x00018208;
1594 WREG32(CB_HW_CONTROL, tmp);
1595
1596 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1597
1598 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1599 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1600 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1601 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1602
1603 WREG32(VGT_NUM_INSTANCES, 1);
1604
1605 WREG32(CP_PERFMON_CNTL, 0);
1606
1607 WREG32(SQ_CONFIG, 0);
1608
1609 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1610 FORCE_EOV_MAX_REZ_CNT(255)));
1611
1612 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1613 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1614
1615 WREG32(VGT_GS_VERTEX_REUSE, 16);
1616 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1617
1618 tmp = RREG32(HDP_MISC_CNTL);
1619 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1620 WREG32(HDP_MISC_CNTL, tmp);
1621
1622 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1623 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1624
1625 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1626 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1627
1628 udelay(50);
1629}
1630
Alex Deucher841cf442012-12-18 21:47:44 -05001631/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001632 * GPU scratch registers helpers function.
1633 */
1634/**
1635 * cik_scratch_init - setup driver info for CP scratch regs
1636 *
1637 * @rdev: radeon_device pointer
1638 *
1639 * Set up the number and offset of the CP scratch registers.
1640 * NOTE: use of CP scratch registers is a legacy inferface and
1641 * is not used by default on newer asics (r6xx+). On newer asics,
1642 * memory buffers are used for fences rather than scratch regs.
1643 */
1644static void cik_scratch_init(struct radeon_device *rdev)
1645{
1646 int i;
1647
1648 rdev->scratch.num_reg = 7;
1649 rdev->scratch.reg_base = SCRATCH_REG0;
1650 for (i = 0; i < rdev->scratch.num_reg; i++) {
1651 rdev->scratch.free[i] = true;
1652 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1653 }
1654}
1655
1656/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001657 * cik_ring_test - basic gfx ring test
1658 *
1659 * @rdev: radeon_device pointer
1660 * @ring: radeon_ring structure holding ring information
1661 *
1662 * Allocate a scratch register and write to it using the gfx ring (CIK).
1663 * Provides a basic gfx ring test to verify that the ring is working.
1664 * Used by cik_cp_gfx_resume();
1665 * Returns 0 on success, error on failure.
1666 */
1667int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1668{
1669 uint32_t scratch;
1670 uint32_t tmp = 0;
1671 unsigned i;
1672 int r;
1673
1674 r = radeon_scratch_get(rdev, &scratch);
1675 if (r) {
1676 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1677 return r;
1678 }
1679 WREG32(scratch, 0xCAFEDEAD);
1680 r = radeon_ring_lock(rdev, ring, 3);
1681 if (r) {
1682 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1683 radeon_scratch_free(rdev, scratch);
1684 return r;
1685 }
1686 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1687 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1688 radeon_ring_write(ring, 0xDEADBEEF);
1689 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04001690
Alex Deucherfbc832c2012-07-20 14:41:35 -04001691 for (i = 0; i < rdev->usec_timeout; i++) {
1692 tmp = RREG32(scratch);
1693 if (tmp == 0xDEADBEEF)
1694 break;
1695 DRM_UDELAY(1);
1696 }
1697 if (i < rdev->usec_timeout) {
1698 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1699 } else {
1700 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1701 ring->idx, scratch, tmp);
1702 r = -EINVAL;
1703 }
1704 radeon_scratch_free(rdev, scratch);
1705 return r;
1706}
1707
1708/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04001709 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001710 *
1711 * @rdev: radeon_device pointer
1712 * @fence: radeon fence object
1713 *
1714 * Emits a fence sequnce number on the gfx ring and flushes
1715 * GPU caches.
1716 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04001717void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
1718 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001719{
1720 struct radeon_ring *ring = &rdev->ring[fence->ring];
1721 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1722
1723 /* EVENT_WRITE_EOP - flush caches, send int */
1724 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1725 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1726 EOP_TC_ACTION_EN |
1727 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1728 EVENT_INDEX(5)));
1729 radeon_ring_write(ring, addr & 0xfffffffc);
1730 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1731 radeon_ring_write(ring, fence->seq);
1732 radeon_ring_write(ring, 0);
1733 /* HDP flush */
1734 /* We should be using the new WAIT_REG_MEM special op packet here
1735 * but it causes the CP to hang
1736 */
1737 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1738 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1739 WRITE_DATA_DST_SEL(0)));
1740 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1741 radeon_ring_write(ring, 0);
1742 radeon_ring_write(ring, 0);
1743}
1744
Alex Deucherb07fdd32013-04-11 09:36:17 -04001745/**
1746 * cik_fence_compute_ring_emit - emit a fence on the compute ring
1747 *
1748 * @rdev: radeon_device pointer
1749 * @fence: radeon fence object
1750 *
1751 * Emits a fence sequnce number on the compute ring and flushes
1752 * GPU caches.
1753 */
1754void cik_fence_compute_ring_emit(struct radeon_device *rdev,
1755 struct radeon_fence *fence)
1756{
1757 struct radeon_ring *ring = &rdev->ring[fence->ring];
1758 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1759
1760 /* RELEASE_MEM - flush caches, send int */
1761 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
1762 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1763 EOP_TC_ACTION_EN |
1764 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1765 EVENT_INDEX(5)));
1766 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
1767 radeon_ring_write(ring, addr & 0xfffffffc);
1768 radeon_ring_write(ring, upper_32_bits(addr));
1769 radeon_ring_write(ring, fence->seq);
1770 radeon_ring_write(ring, 0);
1771 /* HDP flush */
1772 /* We should be using the new WAIT_REG_MEM special op packet here
1773 * but it causes the CP to hang
1774 */
1775 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1776 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1777 WRITE_DATA_DST_SEL(0)));
1778 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1779 radeon_ring_write(ring, 0);
1780 radeon_ring_write(ring, 0);
1781}
1782
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001783void cik_semaphore_ring_emit(struct radeon_device *rdev,
1784 struct radeon_ring *ring,
1785 struct radeon_semaphore *semaphore,
1786 bool emit_wait)
1787{
1788 uint64_t addr = semaphore->gpu_addr;
1789 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1790
1791 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1792 radeon_ring_write(ring, addr & 0xffffffff);
1793 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1794}
1795
1796/*
1797 * IB stuff
1798 */
1799/**
1800 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1801 *
1802 * @rdev: radeon_device pointer
1803 * @ib: radeon indirect buffer object
1804 *
1805 * Emits an DE (drawing engine) or CE (constant engine) IB
1806 * on the gfx ring. IBs are usually generated by userspace
1807 * acceleration drivers and submitted to the kernel for
1808 * sheduling on the ring. This function schedules the IB
1809 * on the gfx ring for execution by the GPU.
1810 */
1811void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1812{
1813 struct radeon_ring *ring = &rdev->ring[ib->ring];
1814 u32 header, control = INDIRECT_BUFFER_VALID;
1815
1816 if (ib->is_const_ib) {
1817 /* set switch buffer packet before const IB */
1818 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1819 radeon_ring_write(ring, 0);
1820
1821 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1822 } else {
1823 u32 next_rptr;
1824 if (ring->rptr_save_reg) {
1825 next_rptr = ring->wptr + 3 + 4;
1826 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1827 radeon_ring_write(ring, ((ring->rptr_save_reg -
1828 PACKET3_SET_UCONFIG_REG_START) >> 2));
1829 radeon_ring_write(ring, next_rptr);
1830 } else if (rdev->wb.enabled) {
1831 next_rptr = ring->wptr + 5 + 4;
1832 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1833 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1834 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1835 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1836 radeon_ring_write(ring, next_rptr);
1837 }
1838
1839 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1840 }
1841
1842 control |= ib->length_dw |
1843 (ib->vm ? (ib->vm->id << 24) : 0);
1844
1845 radeon_ring_write(ring, header);
1846 radeon_ring_write(ring,
1847#ifdef __BIG_ENDIAN
1848 (2 << 0) |
1849#endif
1850 (ib->gpu_addr & 0xFFFFFFFC));
1851 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1852 radeon_ring_write(ring, control);
1853}
1854
Alex Deucherfbc832c2012-07-20 14:41:35 -04001855/**
1856 * cik_ib_test - basic gfx ring IB test
1857 *
1858 * @rdev: radeon_device pointer
1859 * @ring: radeon_ring structure holding ring information
1860 *
1861 * Allocate an IB and execute it on the gfx ring (CIK).
1862 * Provides a basic gfx ring test to verify that IBs are working.
1863 * Returns 0 on success, error on failure.
1864 */
1865int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1866{
1867 struct radeon_ib ib;
1868 uint32_t scratch;
1869 uint32_t tmp = 0;
1870 unsigned i;
1871 int r;
1872
1873 r = radeon_scratch_get(rdev, &scratch);
1874 if (r) {
1875 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1876 return r;
1877 }
1878 WREG32(scratch, 0xCAFEDEAD);
1879 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1880 if (r) {
1881 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1882 return r;
1883 }
1884 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1885 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1886 ib.ptr[2] = 0xDEADBEEF;
1887 ib.length_dw = 3;
1888 r = radeon_ib_schedule(rdev, &ib, NULL);
1889 if (r) {
1890 radeon_scratch_free(rdev, scratch);
1891 radeon_ib_free(rdev, &ib);
1892 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1893 return r;
1894 }
1895 r = radeon_fence_wait(ib.fence, false);
1896 if (r) {
1897 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1898 return r;
1899 }
1900 for (i = 0; i < rdev->usec_timeout; i++) {
1901 tmp = RREG32(scratch);
1902 if (tmp == 0xDEADBEEF)
1903 break;
1904 DRM_UDELAY(1);
1905 }
1906 if (i < rdev->usec_timeout) {
1907 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1908 } else {
1909 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1910 scratch, tmp);
1911 r = -EINVAL;
1912 }
1913 radeon_scratch_free(rdev, scratch);
1914 radeon_ib_free(rdev, &ib);
1915 return r;
1916}
1917
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001918/*
Alex Deucher841cf442012-12-18 21:47:44 -05001919 * CP.
1920 * On CIK, gfx and compute now have independant command processors.
1921 *
1922 * GFX
1923 * Gfx consists of a single ring and can process both gfx jobs and
1924 * compute jobs. The gfx CP consists of three microengines (ME):
1925 * PFP - Pre-Fetch Parser
1926 * ME - Micro Engine
1927 * CE - Constant Engine
1928 * The PFP and ME make up what is considered the Drawing Engine (DE).
1929 * The CE is an asynchronous engine used for updating buffer desciptors
1930 * used by the DE so that they can be loaded into cache in parallel
1931 * while the DE is processing state update packets.
1932 *
1933 * Compute
1934 * The compute CP consists of two microengines (ME):
1935 * MEC1 - Compute MicroEngine 1
1936 * MEC2 - Compute MicroEngine 2
1937 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1938 * The queues are exposed to userspace and are programmed directly
1939 * by the compute runtime.
1940 */
1941/**
1942 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1943 *
1944 * @rdev: radeon_device pointer
1945 * @enable: enable or disable the MEs
1946 *
1947 * Halts or unhalts the gfx MEs.
1948 */
1949static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1950{
1951 if (enable)
1952 WREG32(CP_ME_CNTL, 0);
1953 else {
1954 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1955 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1956 }
1957 udelay(50);
1958}
1959
1960/**
1961 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1962 *
1963 * @rdev: radeon_device pointer
1964 *
1965 * Loads the gfx PFP, ME, and CE ucode.
1966 * Returns 0 for success, -EINVAL if the ucode is not available.
1967 */
1968static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1969{
1970 const __be32 *fw_data;
1971 int i;
1972
1973 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1974 return -EINVAL;
1975
1976 cik_cp_gfx_enable(rdev, false);
1977
1978 /* PFP */
1979 fw_data = (const __be32 *)rdev->pfp_fw->data;
1980 WREG32(CP_PFP_UCODE_ADDR, 0);
1981 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1982 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1983 WREG32(CP_PFP_UCODE_ADDR, 0);
1984
1985 /* CE */
1986 fw_data = (const __be32 *)rdev->ce_fw->data;
1987 WREG32(CP_CE_UCODE_ADDR, 0);
1988 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1989 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1990 WREG32(CP_CE_UCODE_ADDR, 0);
1991
1992 /* ME */
1993 fw_data = (const __be32 *)rdev->me_fw->data;
1994 WREG32(CP_ME_RAM_WADDR, 0);
1995 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1996 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1997 WREG32(CP_ME_RAM_WADDR, 0);
1998
1999 WREG32(CP_PFP_UCODE_ADDR, 0);
2000 WREG32(CP_CE_UCODE_ADDR, 0);
2001 WREG32(CP_ME_RAM_WADDR, 0);
2002 WREG32(CP_ME_RAM_RADDR, 0);
2003 return 0;
2004}
2005
2006/**
2007 * cik_cp_gfx_start - start the gfx ring
2008 *
2009 * @rdev: radeon_device pointer
2010 *
2011 * Enables the ring and loads the clear state context and other
2012 * packets required to init the ring.
2013 * Returns 0 for success, error for failure.
2014 */
2015static int cik_cp_gfx_start(struct radeon_device *rdev)
2016{
2017 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2018 int r, i;
2019
2020 /* init the CP */
2021 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2022 WREG32(CP_ENDIAN_SWAP, 0);
2023 WREG32(CP_DEVICE_ID, 1);
2024
2025 cik_cp_gfx_enable(rdev, true);
2026
2027 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2028 if (r) {
2029 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2030 return r;
2031 }
2032
2033 /* init the CE partitions. CE only used for gfx on CIK */
2034 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2035 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2036 radeon_ring_write(ring, 0xc000);
2037 radeon_ring_write(ring, 0xc000);
2038
2039 /* setup clear context state */
2040 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2041 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2042
2043 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2044 radeon_ring_write(ring, 0x80000000);
2045 radeon_ring_write(ring, 0x80000000);
2046
2047 for (i = 0; i < cik_default_size; i++)
2048 radeon_ring_write(ring, cik_default_state[i]);
2049
2050 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2051 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2052
2053 /* set clear context state */
2054 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2055 radeon_ring_write(ring, 0);
2056
2057 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2058 radeon_ring_write(ring, 0x00000316);
2059 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2060 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2061
2062 radeon_ring_unlock_commit(rdev, ring);
2063
2064 return 0;
2065}
2066
2067/**
2068 * cik_cp_gfx_fini - stop the gfx ring
2069 *
2070 * @rdev: radeon_device pointer
2071 *
2072 * Stop the gfx ring and tear down the driver ring
2073 * info.
2074 */
2075static void cik_cp_gfx_fini(struct radeon_device *rdev)
2076{
2077 cik_cp_gfx_enable(rdev, false);
2078 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2079}
2080
2081/**
2082 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2083 *
2084 * @rdev: radeon_device pointer
2085 *
2086 * Program the location and size of the gfx ring buffer
2087 * and test it to make sure it's working.
2088 * Returns 0 for success, error for failure.
2089 */
2090static int cik_cp_gfx_resume(struct radeon_device *rdev)
2091{
2092 struct radeon_ring *ring;
2093 u32 tmp;
2094 u32 rb_bufsz;
2095 u64 rb_addr;
2096 int r;
2097
2098 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2099 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2100
2101 /* Set the write pointer delay */
2102 WREG32(CP_RB_WPTR_DELAY, 0);
2103
2104 /* set the RB to use vmid 0 */
2105 WREG32(CP_RB_VMID, 0);
2106
2107 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2108
2109 /* ring 0 - compute and gfx */
2110 /* Set ring buffer size */
2111 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2112 rb_bufsz = drm_order(ring->ring_size / 8);
2113 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2114#ifdef __BIG_ENDIAN
2115 tmp |= BUF_SWAP_32BIT;
2116#endif
2117 WREG32(CP_RB0_CNTL, tmp);
2118
2119 /* Initialize the ring buffer's read and write pointers */
2120 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2121 ring->wptr = 0;
2122 WREG32(CP_RB0_WPTR, ring->wptr);
2123
2124 /* set the wb address wether it's enabled or not */
2125 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2126 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2127
2128 /* scratch register shadowing is no longer supported */
2129 WREG32(SCRATCH_UMSK, 0);
2130
2131 if (!rdev->wb.enabled)
2132 tmp |= RB_NO_UPDATE;
2133
2134 mdelay(1);
2135 WREG32(CP_RB0_CNTL, tmp);
2136
2137 rb_addr = ring->gpu_addr >> 8;
2138 WREG32(CP_RB0_BASE, rb_addr);
2139 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2140
2141 ring->rptr = RREG32(CP_RB0_RPTR);
2142
2143 /* start the ring */
2144 cik_cp_gfx_start(rdev);
2145 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2146 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2147 if (r) {
2148 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2149 return r;
2150 }
2151 return 0;
2152}
2153
Alex Deucher963e81f2013-06-26 17:37:11 -04002154u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2155 struct radeon_ring *ring)
2156{
2157 u32 rptr;
2158
2159
2160
2161 if (rdev->wb.enabled) {
2162 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2163 } else {
2164 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2165 rptr = RREG32(CP_HQD_PQ_RPTR);
2166 cik_srbm_select(rdev, 0, 0, 0, 0);
2167 }
2168 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2169
2170 return rptr;
2171}
2172
2173u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2174 struct radeon_ring *ring)
2175{
2176 u32 wptr;
2177
2178 if (rdev->wb.enabled) {
2179 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2180 } else {
2181 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2182 wptr = RREG32(CP_HQD_PQ_WPTR);
2183 cik_srbm_select(rdev, 0, 0, 0, 0);
2184 }
2185 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2186
2187 return wptr;
2188}
2189
2190void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2191 struct radeon_ring *ring)
2192{
2193 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2194
2195 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2196 WDOORBELL32(ring->doorbell_offset, wptr);
2197}
2198
Alex Deucher841cf442012-12-18 21:47:44 -05002199/**
2200 * cik_cp_compute_enable - enable/disable the compute CP MEs
2201 *
2202 * @rdev: radeon_device pointer
2203 * @enable: enable or disable the MEs
2204 *
2205 * Halts or unhalts the compute MEs.
2206 */
2207static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2208{
2209 if (enable)
2210 WREG32(CP_MEC_CNTL, 0);
2211 else
2212 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2213 udelay(50);
2214}
2215
2216/**
2217 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2218 *
2219 * @rdev: radeon_device pointer
2220 *
2221 * Loads the compute MEC1&2 ucode.
2222 * Returns 0 for success, -EINVAL if the ucode is not available.
2223 */
2224static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2225{
2226 const __be32 *fw_data;
2227 int i;
2228
2229 if (!rdev->mec_fw)
2230 return -EINVAL;
2231
2232 cik_cp_compute_enable(rdev, false);
2233
2234 /* MEC1 */
2235 fw_data = (const __be32 *)rdev->mec_fw->data;
2236 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2237 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2238 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2239 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2240
2241 if (rdev->family == CHIP_KAVERI) {
2242 /* MEC2 */
2243 fw_data = (const __be32 *)rdev->mec_fw->data;
2244 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2245 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2246 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2247 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2248 }
2249
2250 return 0;
2251}
2252
2253/**
2254 * cik_cp_compute_start - start the compute queues
2255 *
2256 * @rdev: radeon_device pointer
2257 *
2258 * Enable the compute queues.
2259 * Returns 0 for success, error for failure.
2260 */
2261static int cik_cp_compute_start(struct radeon_device *rdev)
2262{
Alex Deucher963e81f2013-06-26 17:37:11 -04002263 cik_cp_compute_enable(rdev, true);
2264
Alex Deucher841cf442012-12-18 21:47:44 -05002265 return 0;
2266}
2267
2268/**
2269 * cik_cp_compute_fini - stop the compute queues
2270 *
2271 * @rdev: radeon_device pointer
2272 *
2273 * Stop the compute queues and tear down the driver queue
2274 * info.
2275 */
2276static void cik_cp_compute_fini(struct radeon_device *rdev)
2277{
Alex Deucher963e81f2013-06-26 17:37:11 -04002278 int i, idx, r;
2279
Alex Deucher841cf442012-12-18 21:47:44 -05002280 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002281
2282 for (i = 0; i < 2; i++) {
2283 if (i == 0)
2284 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2285 else
2286 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2287
2288 if (rdev->ring[idx].mqd_obj) {
2289 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2290 if (unlikely(r != 0))
2291 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2292
2293 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2294 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2295
2296 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2297 rdev->ring[idx].mqd_obj = NULL;
2298 }
2299 }
Alex Deucher841cf442012-12-18 21:47:44 -05002300}
2301
Alex Deucher963e81f2013-06-26 17:37:11 -04002302static void cik_mec_fini(struct radeon_device *rdev)
2303{
2304 int r;
2305
2306 if (rdev->mec.hpd_eop_obj) {
2307 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2308 if (unlikely(r != 0))
2309 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2310 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2311 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2312
2313 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2314 rdev->mec.hpd_eop_obj = NULL;
2315 }
2316}
2317
2318#define MEC_HPD_SIZE 2048
2319
2320static int cik_mec_init(struct radeon_device *rdev)
2321{
2322 int r;
2323 u32 *hpd;
2324
2325 /*
2326 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2327 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2328 */
2329 if (rdev->family == CHIP_KAVERI)
2330 rdev->mec.num_mec = 2;
2331 else
2332 rdev->mec.num_mec = 1;
2333 rdev->mec.num_pipe = 4;
2334 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2335
2336 if (rdev->mec.hpd_eop_obj == NULL) {
2337 r = radeon_bo_create(rdev,
2338 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2339 PAGE_SIZE, true,
2340 RADEON_GEM_DOMAIN_GTT, NULL,
2341 &rdev->mec.hpd_eop_obj);
2342 if (r) {
2343 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2344 return r;
2345 }
2346 }
2347
2348 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2349 if (unlikely(r != 0)) {
2350 cik_mec_fini(rdev);
2351 return r;
2352 }
2353 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2354 &rdev->mec.hpd_eop_gpu_addr);
2355 if (r) {
2356 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2357 cik_mec_fini(rdev);
2358 return r;
2359 }
2360 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2361 if (r) {
2362 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2363 cik_mec_fini(rdev);
2364 return r;
2365 }
2366
2367 /* clear memory. Not sure if this is required or not */
2368 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2369
2370 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2371 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2372
2373 return 0;
2374}
2375
2376struct hqd_registers
2377{
2378 u32 cp_mqd_base_addr;
2379 u32 cp_mqd_base_addr_hi;
2380 u32 cp_hqd_active;
2381 u32 cp_hqd_vmid;
2382 u32 cp_hqd_persistent_state;
2383 u32 cp_hqd_pipe_priority;
2384 u32 cp_hqd_queue_priority;
2385 u32 cp_hqd_quantum;
2386 u32 cp_hqd_pq_base;
2387 u32 cp_hqd_pq_base_hi;
2388 u32 cp_hqd_pq_rptr;
2389 u32 cp_hqd_pq_rptr_report_addr;
2390 u32 cp_hqd_pq_rptr_report_addr_hi;
2391 u32 cp_hqd_pq_wptr_poll_addr;
2392 u32 cp_hqd_pq_wptr_poll_addr_hi;
2393 u32 cp_hqd_pq_doorbell_control;
2394 u32 cp_hqd_pq_wptr;
2395 u32 cp_hqd_pq_control;
2396 u32 cp_hqd_ib_base_addr;
2397 u32 cp_hqd_ib_base_addr_hi;
2398 u32 cp_hqd_ib_rptr;
2399 u32 cp_hqd_ib_control;
2400 u32 cp_hqd_iq_timer;
2401 u32 cp_hqd_iq_rptr;
2402 u32 cp_hqd_dequeue_request;
2403 u32 cp_hqd_dma_offload;
2404 u32 cp_hqd_sema_cmd;
2405 u32 cp_hqd_msg_type;
2406 u32 cp_hqd_atomic0_preop_lo;
2407 u32 cp_hqd_atomic0_preop_hi;
2408 u32 cp_hqd_atomic1_preop_lo;
2409 u32 cp_hqd_atomic1_preop_hi;
2410 u32 cp_hqd_hq_scheduler0;
2411 u32 cp_hqd_hq_scheduler1;
2412 u32 cp_mqd_control;
2413};
2414
2415struct bonaire_mqd
2416{
2417 u32 header;
2418 u32 dispatch_initiator;
2419 u32 dimensions[3];
2420 u32 start_idx[3];
2421 u32 num_threads[3];
2422 u32 pipeline_stat_enable;
2423 u32 perf_counter_enable;
2424 u32 pgm[2];
2425 u32 tba[2];
2426 u32 tma[2];
2427 u32 pgm_rsrc[2];
2428 u32 vmid;
2429 u32 resource_limits;
2430 u32 static_thread_mgmt01[2];
2431 u32 tmp_ring_size;
2432 u32 static_thread_mgmt23[2];
2433 u32 restart[3];
2434 u32 thread_trace_enable;
2435 u32 reserved1;
2436 u32 user_data[16];
2437 u32 vgtcs_invoke_count[2];
2438 struct hqd_registers queue_state;
2439 u32 dequeue_cntr;
2440 u32 interrupt_queue[64];
2441};
2442
Alex Deucher841cf442012-12-18 21:47:44 -05002443/**
2444 * cik_cp_compute_resume - setup the compute queue registers
2445 *
2446 * @rdev: radeon_device pointer
2447 *
2448 * Program the compute queues and test them to make sure they
2449 * are working.
2450 * Returns 0 for success, error for failure.
2451 */
2452static int cik_cp_compute_resume(struct radeon_device *rdev)
2453{
Alex Deucher963e81f2013-06-26 17:37:11 -04002454 int r, i, idx;
2455 u32 tmp;
2456 bool use_doorbell = true;
2457 u64 hqd_gpu_addr;
2458 u64 mqd_gpu_addr;
2459 u64 eop_gpu_addr;
2460 u64 wb_gpu_addr;
2461 u32 *buf;
2462 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002463
Alex Deucher841cf442012-12-18 21:47:44 -05002464 r = cik_cp_compute_start(rdev);
2465 if (r)
2466 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002467
2468 /* fix up chicken bits */
2469 tmp = RREG32(CP_CPF_DEBUG);
2470 tmp |= (1 << 23);
2471 WREG32(CP_CPF_DEBUG, tmp);
2472
2473 /* init the pipes */
2474 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2475 int me = (i < 4) ? 1 : 2;
2476 int pipe = (i < 4) ? i : (i - 4);
2477
2478 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2479
2480 cik_srbm_select(rdev, me, pipe, 0, 0);
2481
2482 /* write the EOP addr */
2483 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2484 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2485
2486 /* set the VMID assigned */
2487 WREG32(CP_HPD_EOP_VMID, 0);
2488
2489 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2490 tmp = RREG32(CP_HPD_EOP_CONTROL);
2491 tmp &= ~EOP_SIZE_MASK;
2492 tmp |= drm_order(MEC_HPD_SIZE / 8);
2493 WREG32(CP_HPD_EOP_CONTROL, tmp);
2494 }
2495 cik_srbm_select(rdev, 0, 0, 0, 0);
2496
2497 /* init the queues. Just two for now. */
2498 for (i = 0; i < 2; i++) {
2499 if (i == 0)
2500 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2501 else
2502 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2503
2504 if (rdev->ring[idx].mqd_obj == NULL) {
2505 r = radeon_bo_create(rdev,
2506 sizeof(struct bonaire_mqd),
2507 PAGE_SIZE, true,
2508 RADEON_GEM_DOMAIN_GTT, NULL,
2509 &rdev->ring[idx].mqd_obj);
2510 if (r) {
2511 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2512 return r;
2513 }
2514 }
2515
2516 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2517 if (unlikely(r != 0)) {
2518 cik_cp_compute_fini(rdev);
2519 return r;
2520 }
2521 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2522 &mqd_gpu_addr);
2523 if (r) {
2524 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2525 cik_cp_compute_fini(rdev);
2526 return r;
2527 }
2528 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2529 if (r) {
2530 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2531 cik_cp_compute_fini(rdev);
2532 return r;
2533 }
2534
2535 /* doorbell offset */
2536 rdev->ring[idx].doorbell_offset =
2537 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2538
2539 /* init the mqd struct */
2540 memset(buf, 0, sizeof(struct bonaire_mqd));
2541
2542 mqd = (struct bonaire_mqd *)buf;
2543 mqd->header = 0xC0310800;
2544 mqd->static_thread_mgmt01[0] = 0xffffffff;
2545 mqd->static_thread_mgmt01[1] = 0xffffffff;
2546 mqd->static_thread_mgmt23[0] = 0xffffffff;
2547 mqd->static_thread_mgmt23[1] = 0xffffffff;
2548
2549 cik_srbm_select(rdev, rdev->ring[idx].me,
2550 rdev->ring[idx].pipe,
2551 rdev->ring[idx].queue, 0);
2552
2553 /* disable wptr polling */
2554 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2555 tmp &= ~WPTR_POLL_EN;
2556 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2557
2558 /* enable doorbell? */
2559 mqd->queue_state.cp_hqd_pq_doorbell_control =
2560 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2561 if (use_doorbell)
2562 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2563 else
2564 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2565 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2566 mqd->queue_state.cp_hqd_pq_doorbell_control);
2567
2568 /* disable the queue if it's active */
2569 mqd->queue_state.cp_hqd_dequeue_request = 0;
2570 mqd->queue_state.cp_hqd_pq_rptr = 0;
2571 mqd->queue_state.cp_hqd_pq_wptr= 0;
2572 if (RREG32(CP_HQD_ACTIVE) & 1) {
2573 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2574 for (i = 0; i < rdev->usec_timeout; i++) {
2575 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2576 break;
2577 udelay(1);
2578 }
2579 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
2580 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
2581 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2582 }
2583
2584 /* set the pointer to the MQD */
2585 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
2586 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2587 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
2588 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
2589 /* set MQD vmid to 0 */
2590 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
2591 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
2592 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
2593
2594 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2595 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
2596 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
2597 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2598 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
2599 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
2600
2601 /* set up the HQD, this is similar to CP_RB0_CNTL */
2602 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
2603 mqd->queue_state.cp_hqd_pq_control &=
2604 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
2605
2606 mqd->queue_state.cp_hqd_pq_control |=
2607 drm_order(rdev->ring[idx].ring_size / 8);
2608 mqd->queue_state.cp_hqd_pq_control |=
2609 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
2610#ifdef __BIG_ENDIAN
2611 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
2612#endif
2613 mqd->queue_state.cp_hqd_pq_control &=
2614 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
2615 mqd->queue_state.cp_hqd_pq_control |=
2616 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
2617 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
2618
2619 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
2620 if (i == 0)
2621 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
2622 else
2623 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
2624 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
2625 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2626 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
2627 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
2628 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
2629
2630 /* set the wb address wether it's enabled or not */
2631 if (i == 0)
2632 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
2633 else
2634 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
2635 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
2636 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
2637 upper_32_bits(wb_gpu_addr) & 0xffff;
2638 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
2639 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
2640 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2641 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
2642
2643 /* enable the doorbell if requested */
2644 if (use_doorbell) {
2645 mqd->queue_state.cp_hqd_pq_doorbell_control =
2646 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2647 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
2648 mqd->queue_state.cp_hqd_pq_doorbell_control |=
2649 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
2650 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2651 mqd->queue_state.cp_hqd_pq_doorbell_control &=
2652 ~(DOORBELL_SOURCE | DOORBELL_HIT);
2653
2654 } else {
2655 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
2656 }
2657 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2658 mqd->queue_state.cp_hqd_pq_doorbell_control);
2659
2660 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2661 rdev->ring[idx].wptr = 0;
2662 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
2663 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
2664 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
2665 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
2666
2667 /* set the vmid for the queue */
2668 mqd->queue_state.cp_hqd_vmid = 0;
2669 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
2670
2671 /* activate the queue */
2672 mqd->queue_state.cp_hqd_active = 1;
2673 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
2674
2675 cik_srbm_select(rdev, 0, 0, 0, 0);
2676
2677 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
2678 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2679
2680 rdev->ring[idx].ready = true;
2681 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
2682 if (r)
2683 rdev->ring[idx].ready = false;
2684 }
2685
Alex Deucher841cf442012-12-18 21:47:44 -05002686 return 0;
2687}
2688
Alex Deucher841cf442012-12-18 21:47:44 -05002689static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2690{
2691 cik_cp_gfx_enable(rdev, enable);
2692 cik_cp_compute_enable(rdev, enable);
2693}
2694
Alex Deucher841cf442012-12-18 21:47:44 -05002695static int cik_cp_load_microcode(struct radeon_device *rdev)
2696{
2697 int r;
2698
2699 r = cik_cp_gfx_load_microcode(rdev);
2700 if (r)
2701 return r;
2702 r = cik_cp_compute_load_microcode(rdev);
2703 if (r)
2704 return r;
2705
2706 return 0;
2707}
2708
Alex Deucher841cf442012-12-18 21:47:44 -05002709static void cik_cp_fini(struct radeon_device *rdev)
2710{
2711 cik_cp_gfx_fini(rdev);
2712 cik_cp_compute_fini(rdev);
2713}
2714
Alex Deucher841cf442012-12-18 21:47:44 -05002715static int cik_cp_resume(struct radeon_device *rdev)
2716{
2717 int r;
2718
2719 /* Reset all cp blocks */
2720 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2721 RREG32(GRBM_SOFT_RESET);
2722 mdelay(15);
2723 WREG32(GRBM_SOFT_RESET, 0);
2724 RREG32(GRBM_SOFT_RESET);
2725
2726 r = cik_cp_load_microcode(rdev);
2727 if (r)
2728 return r;
2729
2730 r = cik_cp_gfx_resume(rdev);
2731 if (r)
2732 return r;
2733 r = cik_cp_compute_resume(rdev);
2734 if (r)
2735 return r;
2736
2737 return 0;
2738}
2739
Alex Deucher21a93e12013-04-09 12:47:11 -04002740/*
2741 * sDMA - System DMA
2742 * Starting with CIK, the GPU has new asynchronous
2743 * DMA engines. These engines are used for compute
2744 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2745 * and each one supports 1 ring buffer used for gfx
2746 * and 2 queues used for compute.
2747 *
2748 * The programming model is very similar to the CP
2749 * (ring buffer, IBs, etc.), but sDMA has it's own
2750 * packet format that is different from the PM4 format
2751 * used by the CP. sDMA supports copying data, writing
2752 * embedded data, solid fills, and a number of other
2753 * things. It also has support for tiling/detiling of
2754 * buffers.
2755 */
2756/**
2757 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2758 *
2759 * @rdev: radeon_device pointer
2760 * @ib: IB object to schedule
2761 *
2762 * Schedule an IB in the DMA ring (CIK).
2763 */
2764void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2765 struct radeon_ib *ib)
2766{
2767 struct radeon_ring *ring = &rdev->ring[ib->ring];
2768 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2769
2770 if (rdev->wb.enabled) {
2771 u32 next_rptr = ring->wptr + 5;
2772 while ((next_rptr & 7) != 4)
2773 next_rptr++;
2774 next_rptr += 4;
2775 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2776 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2777 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2778 radeon_ring_write(ring, 1); /* number of DWs to follow */
2779 radeon_ring_write(ring, next_rptr);
2780 }
2781
2782 /* IB packet must end on a 8 DW boundary */
2783 while ((ring->wptr & 7) != 4)
2784 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2785 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2786 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2787 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2788 radeon_ring_write(ring, ib->length_dw);
2789
2790}
2791
2792/**
2793 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2794 *
2795 * @rdev: radeon_device pointer
2796 * @fence: radeon fence object
2797 *
2798 * Add a DMA fence packet to the ring to write
2799 * the fence seq number and DMA trap packet to generate
2800 * an interrupt if needed (CIK).
2801 */
2802void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2803 struct radeon_fence *fence)
2804{
2805 struct radeon_ring *ring = &rdev->ring[fence->ring];
2806 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2807 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2808 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2809 u32 ref_and_mask;
2810
2811 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2812 ref_and_mask = SDMA0;
2813 else
2814 ref_and_mask = SDMA1;
2815
2816 /* write the fence */
2817 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2818 radeon_ring_write(ring, addr & 0xffffffff);
2819 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2820 radeon_ring_write(ring, fence->seq);
2821 /* generate an interrupt */
2822 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2823 /* flush HDP */
2824 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2825 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2826 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2827 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2828 radeon_ring_write(ring, ref_and_mask); /* MASK */
2829 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2830}
2831
2832/**
2833 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2834 *
2835 * @rdev: radeon_device pointer
2836 * @ring: radeon_ring structure holding ring information
2837 * @semaphore: radeon semaphore object
2838 * @emit_wait: wait or signal semaphore
2839 *
2840 * Add a DMA semaphore packet to the ring wait on or signal
2841 * other rings (CIK).
2842 */
2843void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2844 struct radeon_ring *ring,
2845 struct radeon_semaphore *semaphore,
2846 bool emit_wait)
2847{
2848 u64 addr = semaphore->gpu_addr;
2849 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2850
2851 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2852 radeon_ring_write(ring, addr & 0xfffffff8);
2853 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2854}
2855
2856/**
2857 * cik_sdma_gfx_stop - stop the gfx async dma engines
2858 *
2859 * @rdev: radeon_device pointer
2860 *
2861 * Stop the gfx async dma ring buffers (CIK).
2862 */
2863static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2864{
2865 u32 rb_cntl, reg_offset;
2866 int i;
2867
2868 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2869
2870 for (i = 0; i < 2; i++) {
2871 if (i == 0)
2872 reg_offset = SDMA0_REGISTER_OFFSET;
2873 else
2874 reg_offset = SDMA1_REGISTER_OFFSET;
2875 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2876 rb_cntl &= ~SDMA_RB_ENABLE;
2877 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2878 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2879 }
2880}
2881
2882/**
2883 * cik_sdma_rlc_stop - stop the compute async dma engines
2884 *
2885 * @rdev: radeon_device pointer
2886 *
2887 * Stop the compute async dma queues (CIK).
2888 */
2889static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2890{
2891 /* XXX todo */
2892}
2893
2894/**
2895 * cik_sdma_enable - stop the async dma engines
2896 *
2897 * @rdev: radeon_device pointer
2898 * @enable: enable/disable the DMA MEs.
2899 *
2900 * Halt or unhalt the async dma engines (CIK).
2901 */
2902static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2903{
2904 u32 me_cntl, reg_offset;
2905 int i;
2906
2907 for (i = 0; i < 2; i++) {
2908 if (i == 0)
2909 reg_offset = SDMA0_REGISTER_OFFSET;
2910 else
2911 reg_offset = SDMA1_REGISTER_OFFSET;
2912 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2913 if (enable)
2914 me_cntl &= ~SDMA_HALT;
2915 else
2916 me_cntl |= SDMA_HALT;
2917 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2918 }
2919}
2920
2921/**
2922 * cik_sdma_gfx_resume - setup and start the async dma engines
2923 *
2924 * @rdev: radeon_device pointer
2925 *
2926 * Set up the gfx DMA ring buffers and enable them (CIK).
2927 * Returns 0 for success, error for failure.
2928 */
2929static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2930{
2931 struct radeon_ring *ring;
2932 u32 rb_cntl, ib_cntl;
2933 u32 rb_bufsz;
2934 u32 reg_offset, wb_offset;
2935 int i, r;
2936
2937 for (i = 0; i < 2; i++) {
2938 if (i == 0) {
2939 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2940 reg_offset = SDMA0_REGISTER_OFFSET;
2941 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2942 } else {
2943 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2944 reg_offset = SDMA1_REGISTER_OFFSET;
2945 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2946 }
2947
2948 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2949 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2950
2951 /* Set ring buffer size in dwords */
2952 rb_bufsz = drm_order(ring->ring_size / 4);
2953 rb_cntl = rb_bufsz << 1;
2954#ifdef __BIG_ENDIAN
2955 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2956#endif
2957 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2958
2959 /* Initialize the ring buffer's read and write pointers */
2960 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2961 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2962
2963 /* set the wb address whether it's enabled or not */
2964 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2965 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2966 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2967 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2968
2969 if (rdev->wb.enabled)
2970 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2971
2972 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2973 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2974
2975 ring->wptr = 0;
2976 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2977
2978 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2979
2980 /* enable DMA RB */
2981 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2982
2983 ib_cntl = SDMA_IB_ENABLE;
2984#ifdef __BIG_ENDIAN
2985 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2986#endif
2987 /* enable DMA IBs */
2988 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2989
2990 ring->ready = true;
2991
2992 r = radeon_ring_test(rdev, ring->idx, ring);
2993 if (r) {
2994 ring->ready = false;
2995 return r;
2996 }
2997 }
2998
2999 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3000
3001 return 0;
3002}
3003
3004/**
3005 * cik_sdma_rlc_resume - setup and start the async dma engines
3006 *
3007 * @rdev: radeon_device pointer
3008 *
3009 * Set up the compute DMA queues and enable them (CIK).
3010 * Returns 0 for success, error for failure.
3011 */
3012static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3013{
3014 /* XXX todo */
3015 return 0;
3016}
3017
3018/**
3019 * cik_sdma_load_microcode - load the sDMA ME ucode
3020 *
3021 * @rdev: radeon_device pointer
3022 *
3023 * Loads the sDMA0/1 ucode.
3024 * Returns 0 for success, -EINVAL if the ucode is not available.
3025 */
3026static int cik_sdma_load_microcode(struct radeon_device *rdev)
3027{
3028 const __be32 *fw_data;
3029 int i;
3030
3031 if (!rdev->sdma_fw)
3032 return -EINVAL;
3033
3034 /* stop the gfx rings and rlc compute queues */
3035 cik_sdma_gfx_stop(rdev);
3036 cik_sdma_rlc_stop(rdev);
3037
3038 /* halt the MEs */
3039 cik_sdma_enable(rdev, false);
3040
3041 /* sdma0 */
3042 fw_data = (const __be32 *)rdev->sdma_fw->data;
3043 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3044 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3045 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3046 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3047
3048 /* sdma1 */
3049 fw_data = (const __be32 *)rdev->sdma_fw->data;
3050 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3051 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3052 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3053 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3054
3055 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3056 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3057 return 0;
3058}
3059
3060/**
3061 * cik_sdma_resume - setup and start the async dma engines
3062 *
3063 * @rdev: radeon_device pointer
3064 *
3065 * Set up the DMA engines and enable them (CIK).
3066 * Returns 0 for success, error for failure.
3067 */
3068static int cik_sdma_resume(struct radeon_device *rdev)
3069{
3070 int r;
3071
3072 /* Reset dma */
3073 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3074 RREG32(SRBM_SOFT_RESET);
3075 udelay(50);
3076 WREG32(SRBM_SOFT_RESET, 0);
3077 RREG32(SRBM_SOFT_RESET);
3078
3079 r = cik_sdma_load_microcode(rdev);
3080 if (r)
3081 return r;
3082
3083 /* unhalt the MEs */
3084 cik_sdma_enable(rdev, true);
3085
3086 /* start the gfx rings and rlc compute queues */
3087 r = cik_sdma_gfx_resume(rdev);
3088 if (r)
3089 return r;
3090 r = cik_sdma_rlc_resume(rdev);
3091 if (r)
3092 return r;
3093
3094 return 0;
3095}
3096
3097/**
3098 * cik_sdma_fini - tear down the async dma engines
3099 *
3100 * @rdev: radeon_device pointer
3101 *
3102 * Stop the async dma engines and free the rings (CIK).
3103 */
3104static void cik_sdma_fini(struct radeon_device *rdev)
3105{
3106 /* stop the gfx rings and rlc compute queues */
3107 cik_sdma_gfx_stop(rdev);
3108 cik_sdma_rlc_stop(rdev);
3109 /* halt the MEs */
3110 cik_sdma_enable(rdev, false);
3111 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3112 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3113 /* XXX - compute dma queue tear down */
3114}
3115
3116/**
3117 * cik_copy_dma - copy pages using the DMA engine
3118 *
3119 * @rdev: radeon_device pointer
3120 * @src_offset: src GPU address
3121 * @dst_offset: dst GPU address
3122 * @num_gpu_pages: number of GPU pages to xfer
3123 * @fence: radeon fence object
3124 *
3125 * Copy GPU paging using the DMA engine (CIK).
3126 * Used by the radeon ttm implementation to move pages if
3127 * registered as the asic copy callback.
3128 */
3129int cik_copy_dma(struct radeon_device *rdev,
3130 uint64_t src_offset, uint64_t dst_offset,
3131 unsigned num_gpu_pages,
3132 struct radeon_fence **fence)
3133{
3134 struct radeon_semaphore *sem = NULL;
3135 int ring_index = rdev->asic->copy.dma_ring_index;
3136 struct radeon_ring *ring = &rdev->ring[ring_index];
3137 u32 size_in_bytes, cur_size_in_bytes;
3138 int i, num_loops;
3139 int r = 0;
3140
3141 r = radeon_semaphore_create(rdev, &sem);
3142 if (r) {
3143 DRM_ERROR("radeon: moving bo (%d).\n", r);
3144 return r;
3145 }
3146
3147 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3148 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3149 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3150 if (r) {
3151 DRM_ERROR("radeon: moving bo (%d).\n", r);
3152 radeon_semaphore_free(rdev, &sem, NULL);
3153 return r;
3154 }
3155
3156 if (radeon_fence_need_sync(*fence, ring->idx)) {
3157 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3158 ring->idx);
3159 radeon_fence_note_sync(*fence, ring->idx);
3160 } else {
3161 radeon_semaphore_free(rdev, &sem, NULL);
3162 }
3163
3164 for (i = 0; i < num_loops; i++) {
3165 cur_size_in_bytes = size_in_bytes;
3166 if (cur_size_in_bytes > 0x1fffff)
3167 cur_size_in_bytes = 0x1fffff;
3168 size_in_bytes -= cur_size_in_bytes;
3169 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3170 radeon_ring_write(ring, cur_size_in_bytes);
3171 radeon_ring_write(ring, 0); /* src/dst endian swap */
3172 radeon_ring_write(ring, src_offset & 0xffffffff);
3173 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3174 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3175 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3176 src_offset += cur_size_in_bytes;
3177 dst_offset += cur_size_in_bytes;
3178 }
3179
3180 r = radeon_fence_emit(rdev, fence, ring->idx);
3181 if (r) {
3182 radeon_ring_unlock_undo(rdev, ring);
3183 return r;
3184 }
3185
3186 radeon_ring_unlock_commit(rdev, ring);
3187 radeon_semaphore_free(rdev, &sem, *fence);
3188
3189 return r;
3190}
3191
3192/**
3193 * cik_sdma_ring_test - simple async dma engine test
3194 *
3195 * @rdev: radeon_device pointer
3196 * @ring: radeon_ring structure holding ring information
3197 *
3198 * Test the DMA engine by writing using it to write an
3199 * value to memory. (CIK).
3200 * Returns 0 for success, error for failure.
3201 */
3202int cik_sdma_ring_test(struct radeon_device *rdev,
3203 struct radeon_ring *ring)
3204{
3205 unsigned i;
3206 int r;
3207 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3208 u32 tmp;
3209
3210 if (!ptr) {
3211 DRM_ERROR("invalid vram scratch pointer\n");
3212 return -EINVAL;
3213 }
3214
3215 tmp = 0xCAFEDEAD;
3216 writel(tmp, ptr);
3217
3218 r = radeon_ring_lock(rdev, ring, 4);
3219 if (r) {
3220 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3221 return r;
3222 }
3223 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3224 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3225 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3226 radeon_ring_write(ring, 1); /* number of DWs to follow */
3227 radeon_ring_write(ring, 0xDEADBEEF);
3228 radeon_ring_unlock_commit(rdev, ring);
3229
3230 for (i = 0; i < rdev->usec_timeout; i++) {
3231 tmp = readl(ptr);
3232 if (tmp == 0xDEADBEEF)
3233 break;
3234 DRM_UDELAY(1);
3235 }
3236
3237 if (i < rdev->usec_timeout) {
3238 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3239 } else {
3240 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3241 ring->idx, tmp);
3242 r = -EINVAL;
3243 }
3244 return r;
3245}
3246
3247/**
3248 * cik_sdma_ib_test - test an IB on the DMA engine
3249 *
3250 * @rdev: radeon_device pointer
3251 * @ring: radeon_ring structure holding ring information
3252 *
3253 * Test a simple IB in the DMA ring (CIK).
3254 * Returns 0 on success, error on failure.
3255 */
3256int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3257{
3258 struct radeon_ib ib;
3259 unsigned i;
3260 int r;
3261 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3262 u32 tmp = 0;
3263
3264 if (!ptr) {
3265 DRM_ERROR("invalid vram scratch pointer\n");
3266 return -EINVAL;
3267 }
3268
3269 tmp = 0xCAFEDEAD;
3270 writel(tmp, ptr);
3271
3272 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3273 if (r) {
3274 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3275 return r;
3276 }
3277
3278 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3279 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3280 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3281 ib.ptr[3] = 1;
3282 ib.ptr[4] = 0xDEADBEEF;
3283 ib.length_dw = 5;
3284
3285 r = radeon_ib_schedule(rdev, &ib, NULL);
3286 if (r) {
3287 radeon_ib_free(rdev, &ib);
3288 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3289 return r;
3290 }
3291 r = radeon_fence_wait(ib.fence, false);
3292 if (r) {
3293 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3294 return r;
3295 }
3296 for (i = 0; i < rdev->usec_timeout; i++) {
3297 tmp = readl(ptr);
3298 if (tmp == 0xDEADBEEF)
3299 break;
3300 DRM_UDELAY(1);
3301 }
3302 if (i < rdev->usec_timeout) {
3303 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3304 } else {
3305 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3306 r = -EINVAL;
3307 }
3308 radeon_ib_free(rdev, &ib);
3309 return r;
3310}
3311
Alex Deuchercc066712013-04-09 12:59:51 -04003312
3313static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3314{
3315 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3316 RREG32(GRBM_STATUS));
3317 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3318 RREG32(GRBM_STATUS2));
3319 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3320 RREG32(GRBM_STATUS_SE0));
3321 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3322 RREG32(GRBM_STATUS_SE1));
3323 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3324 RREG32(GRBM_STATUS_SE2));
3325 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3326 RREG32(GRBM_STATUS_SE3));
3327 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3328 RREG32(SRBM_STATUS));
3329 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3330 RREG32(SRBM_STATUS2));
3331 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3332 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3333 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3334 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003335 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3336 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3337 RREG32(CP_STALLED_STAT1));
3338 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3339 RREG32(CP_STALLED_STAT2));
3340 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3341 RREG32(CP_STALLED_STAT3));
3342 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3343 RREG32(CP_CPF_BUSY_STAT));
3344 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3345 RREG32(CP_CPF_STALLED_STAT1));
3346 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3347 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3348 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3349 RREG32(CP_CPC_STALLED_STAT1));
3350 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003351}
3352
Alex Deucher6f2043c2013-04-09 12:43:41 -04003353/**
Alex Deuchercc066712013-04-09 12:59:51 -04003354 * cik_gpu_check_soft_reset - check which blocks are busy
3355 *
3356 * @rdev: radeon_device pointer
3357 *
3358 * Check which blocks are busy and return the relevant reset
3359 * mask to be used by cik_gpu_soft_reset().
3360 * Returns a mask of the blocks to be reset.
3361 */
3362static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3363{
3364 u32 reset_mask = 0;
3365 u32 tmp;
3366
3367 /* GRBM_STATUS */
3368 tmp = RREG32(GRBM_STATUS);
3369 if (tmp & (PA_BUSY | SC_BUSY |
3370 BCI_BUSY | SX_BUSY |
3371 TA_BUSY | VGT_BUSY |
3372 DB_BUSY | CB_BUSY |
3373 GDS_BUSY | SPI_BUSY |
3374 IA_BUSY | IA_BUSY_NO_DMA))
3375 reset_mask |= RADEON_RESET_GFX;
3376
3377 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3378 reset_mask |= RADEON_RESET_CP;
3379
3380 /* GRBM_STATUS2 */
3381 tmp = RREG32(GRBM_STATUS2);
3382 if (tmp & RLC_BUSY)
3383 reset_mask |= RADEON_RESET_RLC;
3384
3385 /* SDMA0_STATUS_REG */
3386 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3387 if (!(tmp & SDMA_IDLE))
3388 reset_mask |= RADEON_RESET_DMA;
3389
3390 /* SDMA1_STATUS_REG */
3391 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3392 if (!(tmp & SDMA_IDLE))
3393 reset_mask |= RADEON_RESET_DMA1;
3394
3395 /* SRBM_STATUS2 */
3396 tmp = RREG32(SRBM_STATUS2);
3397 if (tmp & SDMA_BUSY)
3398 reset_mask |= RADEON_RESET_DMA;
3399
3400 if (tmp & SDMA1_BUSY)
3401 reset_mask |= RADEON_RESET_DMA1;
3402
3403 /* SRBM_STATUS */
3404 tmp = RREG32(SRBM_STATUS);
3405
3406 if (tmp & IH_BUSY)
3407 reset_mask |= RADEON_RESET_IH;
3408
3409 if (tmp & SEM_BUSY)
3410 reset_mask |= RADEON_RESET_SEM;
3411
3412 if (tmp & GRBM_RQ_PENDING)
3413 reset_mask |= RADEON_RESET_GRBM;
3414
3415 if (tmp & VMC_BUSY)
3416 reset_mask |= RADEON_RESET_VMC;
3417
3418 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3419 MCC_BUSY | MCD_BUSY))
3420 reset_mask |= RADEON_RESET_MC;
3421
3422 if (evergreen_is_display_hung(rdev))
3423 reset_mask |= RADEON_RESET_DISPLAY;
3424
3425 /* Skip MC reset as it's mostly likely not hung, just busy */
3426 if (reset_mask & RADEON_RESET_MC) {
3427 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3428 reset_mask &= ~RADEON_RESET_MC;
3429 }
3430
3431 return reset_mask;
3432}
3433
3434/**
3435 * cik_gpu_soft_reset - soft reset GPU
3436 *
3437 * @rdev: radeon_device pointer
3438 * @reset_mask: mask of which blocks to reset
3439 *
3440 * Soft reset the blocks specified in @reset_mask.
3441 */
3442static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3443{
3444 struct evergreen_mc_save save;
3445 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3446 u32 tmp;
3447
3448 if (reset_mask == 0)
3449 return;
3450
3451 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3452
3453 cik_print_gpu_status_regs(rdev);
3454 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3455 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3456 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3457 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3458
3459 /* stop the rlc */
3460 cik_rlc_stop(rdev);
3461
3462 /* Disable GFX parsing/prefetching */
3463 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3464
3465 /* Disable MEC parsing/prefetching */
3466 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3467
3468 if (reset_mask & RADEON_RESET_DMA) {
3469 /* sdma0 */
3470 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3471 tmp |= SDMA_HALT;
3472 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3473 }
3474 if (reset_mask & RADEON_RESET_DMA1) {
3475 /* sdma1 */
3476 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3477 tmp |= SDMA_HALT;
3478 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3479 }
3480
3481 evergreen_mc_stop(rdev, &save);
3482 if (evergreen_mc_wait_for_idle(rdev)) {
3483 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3484 }
3485
3486 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3487 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3488
3489 if (reset_mask & RADEON_RESET_CP) {
3490 grbm_soft_reset |= SOFT_RESET_CP;
3491
3492 srbm_soft_reset |= SOFT_RESET_GRBM;
3493 }
3494
3495 if (reset_mask & RADEON_RESET_DMA)
3496 srbm_soft_reset |= SOFT_RESET_SDMA;
3497
3498 if (reset_mask & RADEON_RESET_DMA1)
3499 srbm_soft_reset |= SOFT_RESET_SDMA1;
3500
3501 if (reset_mask & RADEON_RESET_DISPLAY)
3502 srbm_soft_reset |= SOFT_RESET_DC;
3503
3504 if (reset_mask & RADEON_RESET_RLC)
3505 grbm_soft_reset |= SOFT_RESET_RLC;
3506
3507 if (reset_mask & RADEON_RESET_SEM)
3508 srbm_soft_reset |= SOFT_RESET_SEM;
3509
3510 if (reset_mask & RADEON_RESET_IH)
3511 srbm_soft_reset |= SOFT_RESET_IH;
3512
3513 if (reset_mask & RADEON_RESET_GRBM)
3514 srbm_soft_reset |= SOFT_RESET_GRBM;
3515
3516 if (reset_mask & RADEON_RESET_VMC)
3517 srbm_soft_reset |= SOFT_RESET_VMC;
3518
3519 if (!(rdev->flags & RADEON_IS_IGP)) {
3520 if (reset_mask & RADEON_RESET_MC)
3521 srbm_soft_reset |= SOFT_RESET_MC;
3522 }
3523
3524 if (grbm_soft_reset) {
3525 tmp = RREG32(GRBM_SOFT_RESET);
3526 tmp |= grbm_soft_reset;
3527 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3528 WREG32(GRBM_SOFT_RESET, tmp);
3529 tmp = RREG32(GRBM_SOFT_RESET);
3530
3531 udelay(50);
3532
3533 tmp &= ~grbm_soft_reset;
3534 WREG32(GRBM_SOFT_RESET, tmp);
3535 tmp = RREG32(GRBM_SOFT_RESET);
3536 }
3537
3538 if (srbm_soft_reset) {
3539 tmp = RREG32(SRBM_SOFT_RESET);
3540 tmp |= srbm_soft_reset;
3541 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3542 WREG32(SRBM_SOFT_RESET, tmp);
3543 tmp = RREG32(SRBM_SOFT_RESET);
3544
3545 udelay(50);
3546
3547 tmp &= ~srbm_soft_reset;
3548 WREG32(SRBM_SOFT_RESET, tmp);
3549 tmp = RREG32(SRBM_SOFT_RESET);
3550 }
3551
3552 /* Wait a little for things to settle down */
3553 udelay(50);
3554
3555 evergreen_mc_resume(rdev, &save);
3556 udelay(50);
3557
3558 cik_print_gpu_status_regs(rdev);
3559}
3560
3561/**
3562 * cik_asic_reset - soft reset GPU
3563 *
3564 * @rdev: radeon_device pointer
3565 *
3566 * Look up which blocks are hung and attempt
3567 * to reset them.
3568 * Returns 0 for success.
3569 */
3570int cik_asic_reset(struct radeon_device *rdev)
3571{
3572 u32 reset_mask;
3573
3574 reset_mask = cik_gpu_check_soft_reset(rdev);
3575
3576 if (reset_mask)
3577 r600_set_bios_scratch_engine_hung(rdev, true);
3578
3579 cik_gpu_soft_reset(rdev, reset_mask);
3580
3581 reset_mask = cik_gpu_check_soft_reset(rdev);
3582
3583 if (!reset_mask)
3584 r600_set_bios_scratch_engine_hung(rdev, false);
3585
3586 return 0;
3587}
3588
3589/**
3590 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04003591 *
3592 * @rdev: radeon_device pointer
3593 * @ring: radeon_ring structure holding ring information
3594 *
3595 * Check if the 3D engine is locked up (CIK).
3596 * Returns true if the engine is locked, false if not.
3597 */
Alex Deuchercc066712013-04-09 12:59:51 -04003598bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04003599{
Alex Deuchercc066712013-04-09 12:59:51 -04003600 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04003601
Alex Deuchercc066712013-04-09 12:59:51 -04003602 if (!(reset_mask & (RADEON_RESET_GFX |
3603 RADEON_RESET_COMPUTE |
3604 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04003605 radeon_ring_lockup_update(ring);
3606 return false;
3607 }
3608 /* force CP activities */
3609 radeon_ring_force_activity(rdev, ring);
3610 return radeon_ring_test_lockup(rdev, ring);
3611}
3612
3613/**
Alex Deucher21a93e12013-04-09 12:47:11 -04003614 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3615 *
3616 * @rdev: radeon_device pointer
3617 * @ring: radeon_ring structure holding ring information
3618 *
3619 * Check if the async DMA engine is locked up (CIK).
3620 * Returns true if the engine appears to be locked up, false if not.
3621 */
3622bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3623{
Alex Deuchercc066712013-04-09 12:59:51 -04003624 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3625 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04003626
3627 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04003628 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04003629 else
Alex Deuchercc066712013-04-09 12:59:51 -04003630 mask = RADEON_RESET_DMA1;
3631
3632 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04003633 radeon_ring_lockup_update(ring);
3634 return false;
3635 }
3636 /* force ring activities */
3637 radeon_ring_force_activity(rdev, ring);
3638 return radeon_ring_test_lockup(rdev, ring);
3639}
3640
Alex Deucher1c491652013-04-09 12:45:26 -04003641/* MC */
3642/**
3643 * cik_mc_program - program the GPU memory controller
3644 *
3645 * @rdev: radeon_device pointer
3646 *
3647 * Set the location of vram, gart, and AGP in the GPU's
3648 * physical address space (CIK).
3649 */
3650static void cik_mc_program(struct radeon_device *rdev)
3651{
3652 struct evergreen_mc_save save;
3653 u32 tmp;
3654 int i, j;
3655
3656 /* Initialize HDP */
3657 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3658 WREG32((0x2c14 + j), 0x00000000);
3659 WREG32((0x2c18 + j), 0x00000000);
3660 WREG32((0x2c1c + j), 0x00000000);
3661 WREG32((0x2c20 + j), 0x00000000);
3662 WREG32((0x2c24 + j), 0x00000000);
3663 }
3664 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3665
3666 evergreen_mc_stop(rdev, &save);
3667 if (radeon_mc_wait_for_idle(rdev)) {
3668 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3669 }
3670 /* Lockout access through VGA aperture*/
3671 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3672 /* Update configuration */
3673 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3674 rdev->mc.vram_start >> 12);
3675 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3676 rdev->mc.vram_end >> 12);
3677 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3678 rdev->vram_scratch.gpu_addr >> 12);
3679 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3680 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3681 WREG32(MC_VM_FB_LOCATION, tmp);
3682 /* XXX double check these! */
3683 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3684 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3685 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3686 WREG32(MC_VM_AGP_BASE, 0);
3687 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3688 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3689 if (radeon_mc_wait_for_idle(rdev)) {
3690 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3691 }
3692 evergreen_mc_resume(rdev, &save);
3693 /* we need to own VRAM, so turn off the VGA renderer here
3694 * to stop it overwriting our objects */
3695 rv515_vga_render_disable(rdev);
3696}
3697
3698/**
3699 * cik_mc_init - initialize the memory controller driver params
3700 *
3701 * @rdev: radeon_device pointer
3702 *
3703 * Look up the amount of vram, vram width, and decide how to place
3704 * vram and gart within the GPU's physical address space (CIK).
3705 * Returns 0 for success.
3706 */
3707static int cik_mc_init(struct radeon_device *rdev)
3708{
3709 u32 tmp;
3710 int chansize, numchan;
3711
3712 /* Get VRAM informations */
3713 rdev->mc.vram_is_ddr = true;
3714 tmp = RREG32(MC_ARB_RAMCFG);
3715 if (tmp & CHANSIZE_MASK) {
3716 chansize = 64;
3717 } else {
3718 chansize = 32;
3719 }
3720 tmp = RREG32(MC_SHARED_CHMAP);
3721 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3722 case 0:
3723 default:
3724 numchan = 1;
3725 break;
3726 case 1:
3727 numchan = 2;
3728 break;
3729 case 2:
3730 numchan = 4;
3731 break;
3732 case 3:
3733 numchan = 8;
3734 break;
3735 case 4:
3736 numchan = 3;
3737 break;
3738 case 5:
3739 numchan = 6;
3740 break;
3741 case 6:
3742 numchan = 10;
3743 break;
3744 case 7:
3745 numchan = 12;
3746 break;
3747 case 8:
3748 numchan = 16;
3749 break;
3750 }
3751 rdev->mc.vram_width = numchan * chansize;
3752 /* Could aper size report 0 ? */
3753 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3754 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3755 /* size in MB on si */
3756 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3757 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3758 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3759 si_vram_gtt_location(rdev, &rdev->mc);
3760 radeon_update_bandwidth_info(rdev);
3761
3762 return 0;
3763}
3764
3765/*
3766 * GART
3767 * VMID 0 is the physical GPU addresses as used by the kernel.
3768 * VMIDs 1-15 are used for userspace clients and are handled
3769 * by the radeon vm/hsa code.
3770 */
3771/**
3772 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3773 *
3774 * @rdev: radeon_device pointer
3775 *
3776 * Flush the TLB for the VMID 0 page table (CIK).
3777 */
3778void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3779{
3780 /* flush hdp cache */
3781 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3782
3783 /* bits 0-15 are the VM contexts0-15 */
3784 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3785}
3786
3787/**
3788 * cik_pcie_gart_enable - gart enable
3789 *
3790 * @rdev: radeon_device pointer
3791 *
3792 * This sets up the TLBs, programs the page tables for VMID0,
3793 * sets up the hw for VMIDs 1-15 which are allocated on
3794 * demand, and sets up the global locations for the LDS, GDS,
3795 * and GPUVM for FSA64 clients (CIK).
3796 * Returns 0 for success, errors for failure.
3797 */
3798static int cik_pcie_gart_enable(struct radeon_device *rdev)
3799{
3800 int r, i;
3801
3802 if (rdev->gart.robj == NULL) {
3803 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3804 return -EINVAL;
3805 }
3806 r = radeon_gart_table_vram_pin(rdev);
3807 if (r)
3808 return r;
3809 radeon_gart_restore(rdev);
3810 /* Setup TLB control */
3811 WREG32(MC_VM_MX_L1_TLB_CNTL,
3812 (0xA << 7) |
3813 ENABLE_L1_TLB |
3814 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3815 ENABLE_ADVANCED_DRIVER_MODEL |
3816 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3817 /* Setup L2 cache */
3818 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3819 ENABLE_L2_FRAGMENT_PROCESSING |
3820 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3821 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3822 EFFECTIVE_L2_QUEUE_SIZE(7) |
3823 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3824 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3825 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3826 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3827 /* setup context0 */
3828 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3829 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3830 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3831 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3832 (u32)(rdev->dummy_page.addr >> 12));
3833 WREG32(VM_CONTEXT0_CNTL2, 0);
3834 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3835 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3836
3837 WREG32(0x15D4, 0);
3838 WREG32(0x15D8, 0);
3839 WREG32(0x15DC, 0);
3840
3841 /* empty context1-15 */
3842 /* FIXME start with 4G, once using 2 level pt switch to full
3843 * vm size space
3844 */
3845 /* set vm size, must be a multiple of 4 */
3846 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3847 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3848 for (i = 1; i < 16; i++) {
3849 if (i < 8)
3850 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3851 rdev->gart.table_addr >> 12);
3852 else
3853 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3854 rdev->gart.table_addr >> 12);
3855 }
3856
3857 /* enable context1-15 */
3858 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3859 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003860 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003861 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003862 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3863 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3864 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3865 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3866 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3867 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3868 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3869 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3870 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3871 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3872 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3873 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003874
3875 /* TC cache setup ??? */
3876 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3877 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3878 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3879
3880 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3881 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3882 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3883 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3884 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3885
3886 WREG32(TC_CFG_L1_VOLATILE, 0);
3887 WREG32(TC_CFG_L2_VOLATILE, 0);
3888
3889 if (rdev->family == CHIP_KAVERI) {
3890 u32 tmp = RREG32(CHUB_CONTROL);
3891 tmp &= ~BYPASS_VM;
3892 WREG32(CHUB_CONTROL, tmp);
3893 }
3894
3895 /* XXX SH_MEM regs */
3896 /* where to put LDS, scratch, GPUVM in FSA64 space */
3897 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05003898 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04003899 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003900 WREG32(SH_MEM_CONFIG, 0);
3901 WREG32(SH_MEM_APE1_BASE, 1);
3902 WREG32(SH_MEM_APE1_LIMIT, 0);
3903 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003904 /* SDMA GFX */
3905 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3906 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3907 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3908 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3909 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003910 }
Alex Deucherb556b122013-01-29 10:44:22 -05003911 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04003912
3913 cik_pcie_gart_tlb_flush(rdev);
3914 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3915 (unsigned)(rdev->mc.gtt_size >> 20),
3916 (unsigned long long)rdev->gart.table_addr);
3917 rdev->gart.ready = true;
3918 return 0;
3919}
3920
3921/**
3922 * cik_pcie_gart_disable - gart disable
3923 *
3924 * @rdev: radeon_device pointer
3925 *
3926 * This disables all VM page table (CIK).
3927 */
3928static void cik_pcie_gart_disable(struct radeon_device *rdev)
3929{
3930 /* Disable all tables */
3931 WREG32(VM_CONTEXT0_CNTL, 0);
3932 WREG32(VM_CONTEXT1_CNTL, 0);
3933 /* Setup TLB control */
3934 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3935 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3936 /* Setup L2 cache */
3937 WREG32(VM_L2_CNTL,
3938 ENABLE_L2_FRAGMENT_PROCESSING |
3939 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3940 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3941 EFFECTIVE_L2_QUEUE_SIZE(7) |
3942 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3943 WREG32(VM_L2_CNTL2, 0);
3944 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3945 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3946 radeon_gart_table_vram_unpin(rdev);
3947}
3948
3949/**
3950 * cik_pcie_gart_fini - vm fini callback
3951 *
3952 * @rdev: radeon_device pointer
3953 *
3954 * Tears down the driver GART/VM setup (CIK).
3955 */
3956static void cik_pcie_gart_fini(struct radeon_device *rdev)
3957{
3958 cik_pcie_gart_disable(rdev);
3959 radeon_gart_table_vram_free(rdev);
3960 radeon_gart_fini(rdev);
3961}
3962
3963/* vm parser */
3964/**
3965 * cik_ib_parse - vm ib_parse callback
3966 *
3967 * @rdev: radeon_device pointer
3968 * @ib: indirect buffer pointer
3969 *
3970 * CIK uses hw IB checking so this is a nop (CIK).
3971 */
3972int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3973{
3974 return 0;
3975}
3976
3977/*
3978 * vm
3979 * VMID 0 is the physical GPU addresses as used by the kernel.
3980 * VMIDs 1-15 are used for userspace clients and are handled
3981 * by the radeon vm/hsa code.
3982 */
3983/**
3984 * cik_vm_init - cik vm init callback
3985 *
3986 * @rdev: radeon_device pointer
3987 *
3988 * Inits cik specific vm parameters (number of VMs, base of vram for
3989 * VMIDs 1-15) (CIK).
3990 * Returns 0 for success.
3991 */
3992int cik_vm_init(struct radeon_device *rdev)
3993{
3994 /* number of VMs */
3995 rdev->vm_manager.nvm = 16;
3996 /* base offset of vram pages */
3997 if (rdev->flags & RADEON_IS_IGP) {
3998 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3999 tmp <<= 22;
4000 rdev->vm_manager.vram_base_offset = tmp;
4001 } else
4002 rdev->vm_manager.vram_base_offset = 0;
4003
4004 return 0;
4005}
4006
4007/**
4008 * cik_vm_fini - cik vm fini callback
4009 *
4010 * @rdev: radeon_device pointer
4011 *
4012 * Tear down any asic specific VM setup (CIK).
4013 */
4014void cik_vm_fini(struct radeon_device *rdev)
4015{
4016}
4017
Alex Deucherf96ab482012-08-31 10:37:47 -04004018/**
4019 * cik_vm_flush - cik vm flush using the CP
4020 *
4021 * @rdev: radeon_device pointer
4022 *
4023 * Update the page table base and flush the VM TLB
4024 * using the CP (CIK).
4025 */
4026void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4027{
4028 struct radeon_ring *ring = &rdev->ring[ridx];
4029
4030 if (vm == NULL)
4031 return;
4032
4033 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4034 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4035 WRITE_DATA_DST_SEL(0)));
4036 if (vm->id < 8) {
4037 radeon_ring_write(ring,
4038 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4039 } else {
4040 radeon_ring_write(ring,
4041 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4042 }
4043 radeon_ring_write(ring, 0);
4044 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4045
4046 /* update SH_MEM_* regs */
4047 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4048 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4049 WRITE_DATA_DST_SEL(0)));
4050 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4051 radeon_ring_write(ring, 0);
4052 radeon_ring_write(ring, VMID(vm->id));
4053
4054 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4055 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4056 WRITE_DATA_DST_SEL(0)));
4057 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4058 radeon_ring_write(ring, 0);
4059
4060 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4061 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4062 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4063 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4064
4065 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4066 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4067 WRITE_DATA_DST_SEL(0)));
4068 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4069 radeon_ring_write(ring, 0);
4070 radeon_ring_write(ring, VMID(0));
4071
4072 /* HDP flush */
4073 /* We should be using the WAIT_REG_MEM packet here like in
4074 * cik_fence_ring_emit(), but it causes the CP to hang in this
4075 * context...
4076 */
4077 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4078 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4079 WRITE_DATA_DST_SEL(0)));
4080 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4081 radeon_ring_write(ring, 0);
4082 radeon_ring_write(ring, 0);
4083
4084 /* bits 0-15 are the VM contexts0-15 */
4085 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4086 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4087 WRITE_DATA_DST_SEL(0)));
4088 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4089 radeon_ring_write(ring, 0);
4090 radeon_ring_write(ring, 1 << vm->id);
4091
Alex Deucherb07fdd32013-04-11 09:36:17 -04004092 /* compute doesn't have PFP */
4093 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4094 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4095 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4096 radeon_ring_write(ring, 0x0);
4097 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004098}
4099
Alex Deucher605de6b2012-10-22 13:04:03 -04004100/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004101 * cik_vm_set_page - update the page tables using sDMA
4102 *
4103 * @rdev: radeon_device pointer
4104 * @ib: indirect buffer to fill with commands
4105 * @pe: addr of the page entry
4106 * @addr: dst addr to write into pe
4107 * @count: number of page entries to update
4108 * @incr: increase next addr by incr bytes
4109 * @flags: access flags
4110 *
4111 * Update the page tables using CP or sDMA (CIK).
4112 */
4113void cik_vm_set_page(struct radeon_device *rdev,
4114 struct radeon_ib *ib,
4115 uint64_t pe,
4116 uint64_t addr, unsigned count,
4117 uint32_t incr, uint32_t flags)
4118{
4119 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4120 uint64_t value;
4121 unsigned ndw;
4122
4123 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4124 /* CP */
4125 while (count) {
4126 ndw = 2 + count * 2;
4127 if (ndw > 0x3FFE)
4128 ndw = 0x3FFE;
4129
4130 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4131 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4132 WRITE_DATA_DST_SEL(1));
4133 ib->ptr[ib->length_dw++] = pe;
4134 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4135 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4136 if (flags & RADEON_VM_PAGE_SYSTEM) {
4137 value = radeon_vm_map_gart(rdev, addr);
4138 value &= 0xFFFFFFFFFFFFF000ULL;
4139 } else if (flags & RADEON_VM_PAGE_VALID) {
4140 value = addr;
4141 } else {
4142 value = 0;
4143 }
4144 addr += incr;
4145 value |= r600_flags;
4146 ib->ptr[ib->length_dw++] = value;
4147 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4148 }
4149 }
4150 } else {
4151 /* DMA */
4152 if (flags & RADEON_VM_PAGE_SYSTEM) {
4153 while (count) {
4154 ndw = count * 2;
4155 if (ndw > 0xFFFFE)
4156 ndw = 0xFFFFE;
4157
4158 /* for non-physically contiguous pages (system) */
4159 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4160 ib->ptr[ib->length_dw++] = pe;
4161 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4162 ib->ptr[ib->length_dw++] = ndw;
4163 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4164 if (flags & RADEON_VM_PAGE_SYSTEM) {
4165 value = radeon_vm_map_gart(rdev, addr);
4166 value &= 0xFFFFFFFFFFFFF000ULL;
4167 } else if (flags & RADEON_VM_PAGE_VALID) {
4168 value = addr;
4169 } else {
4170 value = 0;
4171 }
4172 addr += incr;
4173 value |= r600_flags;
4174 ib->ptr[ib->length_dw++] = value;
4175 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4176 }
4177 }
4178 } else {
4179 while (count) {
4180 ndw = count;
4181 if (ndw > 0x7FFFF)
4182 ndw = 0x7FFFF;
4183
4184 if (flags & RADEON_VM_PAGE_VALID)
4185 value = addr;
4186 else
4187 value = 0;
4188 /* for physically contiguous pages (vram) */
4189 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4190 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4191 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4192 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4193 ib->ptr[ib->length_dw++] = 0;
4194 ib->ptr[ib->length_dw++] = value; /* value */
4195 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4196 ib->ptr[ib->length_dw++] = incr; /* increment size */
4197 ib->ptr[ib->length_dw++] = 0;
4198 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4199 pe += ndw * 8;
4200 addr += ndw * incr;
4201 count -= ndw;
4202 }
4203 }
4204 while (ib->length_dw & 0x7)
4205 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4206 }
4207}
4208
4209/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004210 * cik_dma_vm_flush - cik vm flush using sDMA
4211 *
4212 * @rdev: radeon_device pointer
4213 *
4214 * Update the page table base and flush the VM TLB
4215 * using sDMA (CIK).
4216 */
4217void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4218{
4219 struct radeon_ring *ring = &rdev->ring[ridx];
4220 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4221 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4222 u32 ref_and_mask;
4223
4224 if (vm == NULL)
4225 return;
4226
4227 if (ridx == R600_RING_TYPE_DMA_INDEX)
4228 ref_and_mask = SDMA0;
4229 else
4230 ref_and_mask = SDMA1;
4231
4232 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4233 if (vm->id < 8) {
4234 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4235 } else {
4236 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4237 }
4238 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4239
4240 /* update SH_MEM_* regs */
4241 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4242 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4243 radeon_ring_write(ring, VMID(vm->id));
4244
4245 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4246 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4247 radeon_ring_write(ring, 0);
4248
4249 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4250 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4251 radeon_ring_write(ring, 0);
4252
4253 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4254 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4255 radeon_ring_write(ring, 1);
4256
4257 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4258 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4259 radeon_ring_write(ring, 0);
4260
4261 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4262 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4263 radeon_ring_write(ring, VMID(0));
4264
4265 /* flush HDP */
4266 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4267 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4268 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4269 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4270 radeon_ring_write(ring, ref_and_mask); /* MASK */
4271 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4272
4273 /* flush TLB */
4274 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4275 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4276 radeon_ring_write(ring, 1 << vm->id);
4277}
4278
Alex Deucherf6796ca2012-11-09 10:44:08 -05004279/*
4280 * RLC
4281 * The RLC is a multi-purpose microengine that handles a
4282 * variety of functions, the most important of which is
4283 * the interrupt controller.
4284 */
4285/**
4286 * cik_rlc_stop - stop the RLC ME
4287 *
4288 * @rdev: radeon_device pointer
4289 *
4290 * Halt the RLC ME (MicroEngine) (CIK).
4291 */
4292static void cik_rlc_stop(struct radeon_device *rdev)
4293{
4294 int i, j, k;
4295 u32 mask, tmp;
4296
4297 tmp = RREG32(CP_INT_CNTL_RING0);
4298 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4299 WREG32(CP_INT_CNTL_RING0, tmp);
4300
4301 RREG32(CB_CGTT_SCLK_CTRL);
4302 RREG32(CB_CGTT_SCLK_CTRL);
4303 RREG32(CB_CGTT_SCLK_CTRL);
4304 RREG32(CB_CGTT_SCLK_CTRL);
4305
4306 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4307 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4308
4309 WREG32(RLC_CNTL, 0);
4310
4311 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4312 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4313 cik_select_se_sh(rdev, i, j);
4314 for (k = 0; k < rdev->usec_timeout; k++) {
4315 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4316 break;
4317 udelay(1);
4318 }
4319 }
4320 }
4321 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4322
4323 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4324 for (k = 0; k < rdev->usec_timeout; k++) {
4325 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4326 break;
4327 udelay(1);
4328 }
4329}
4330
4331/**
4332 * cik_rlc_start - start the RLC ME
4333 *
4334 * @rdev: radeon_device pointer
4335 *
4336 * Unhalt the RLC ME (MicroEngine) (CIK).
4337 */
4338static void cik_rlc_start(struct radeon_device *rdev)
4339{
4340 u32 tmp;
4341
4342 WREG32(RLC_CNTL, RLC_ENABLE);
4343
4344 tmp = RREG32(CP_INT_CNTL_RING0);
4345 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4346 WREG32(CP_INT_CNTL_RING0, tmp);
4347
4348 udelay(50);
4349}
4350
4351/**
4352 * cik_rlc_resume - setup the RLC hw
4353 *
4354 * @rdev: radeon_device pointer
4355 *
4356 * Initialize the RLC registers, load the ucode,
4357 * and start the RLC (CIK).
4358 * Returns 0 for success, -EINVAL if the ucode is not available.
4359 */
4360static int cik_rlc_resume(struct radeon_device *rdev)
4361{
4362 u32 i, size;
4363 u32 clear_state_info[3];
4364 const __be32 *fw_data;
4365
4366 if (!rdev->rlc_fw)
4367 return -EINVAL;
4368
4369 switch (rdev->family) {
4370 case CHIP_BONAIRE:
4371 default:
4372 size = BONAIRE_RLC_UCODE_SIZE;
4373 break;
4374 case CHIP_KAVERI:
4375 size = KV_RLC_UCODE_SIZE;
4376 break;
4377 case CHIP_KABINI:
4378 size = KB_RLC_UCODE_SIZE;
4379 break;
4380 }
4381
4382 cik_rlc_stop(rdev);
4383
4384 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4385 RREG32(GRBM_SOFT_RESET);
4386 udelay(50);
4387 WREG32(GRBM_SOFT_RESET, 0);
4388 RREG32(GRBM_SOFT_RESET);
4389 udelay(50);
4390
4391 WREG32(RLC_LB_CNTR_INIT, 0);
4392 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4393
4394 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4395 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4396 WREG32(RLC_LB_PARAMS, 0x00600408);
4397 WREG32(RLC_LB_CNTL, 0x80000004);
4398
4399 WREG32(RLC_MC_CNTL, 0);
4400 WREG32(RLC_UCODE_CNTL, 0);
4401
4402 fw_data = (const __be32 *)rdev->rlc_fw->data;
4403 WREG32(RLC_GPM_UCODE_ADDR, 0);
4404 for (i = 0; i < size; i++)
4405 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4406 WREG32(RLC_GPM_UCODE_ADDR, 0);
4407
4408 /* XXX */
4409 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4410 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4411 clear_state_info[2] = 0;//cik_default_size;
4412 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4413 for (i = 0; i < 3; i++)
4414 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4415 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4416
4417 cik_rlc_start(rdev);
4418
4419 return 0;
4420}
Alex Deuchera59781b2012-11-09 10:45:57 -05004421
4422/*
4423 * Interrupts
4424 * Starting with r6xx, interrupts are handled via a ring buffer.
4425 * Ring buffers are areas of GPU accessible memory that the GPU
4426 * writes interrupt vectors into and the host reads vectors out of.
4427 * There is a rptr (read pointer) that determines where the
4428 * host is currently reading, and a wptr (write pointer)
4429 * which determines where the GPU has written. When the
4430 * pointers are equal, the ring is idle. When the GPU
4431 * writes vectors to the ring buffer, it increments the
4432 * wptr. When there is an interrupt, the host then starts
4433 * fetching commands and processing them until the pointers are
4434 * equal again at which point it updates the rptr.
4435 */
4436
4437/**
4438 * cik_enable_interrupts - Enable the interrupt ring buffer
4439 *
4440 * @rdev: radeon_device pointer
4441 *
4442 * Enable the interrupt ring buffer (CIK).
4443 */
4444static void cik_enable_interrupts(struct radeon_device *rdev)
4445{
4446 u32 ih_cntl = RREG32(IH_CNTL);
4447 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4448
4449 ih_cntl |= ENABLE_INTR;
4450 ih_rb_cntl |= IH_RB_ENABLE;
4451 WREG32(IH_CNTL, ih_cntl);
4452 WREG32(IH_RB_CNTL, ih_rb_cntl);
4453 rdev->ih.enabled = true;
4454}
4455
4456/**
4457 * cik_disable_interrupts - Disable the interrupt ring buffer
4458 *
4459 * @rdev: radeon_device pointer
4460 *
4461 * Disable the interrupt ring buffer (CIK).
4462 */
4463static void cik_disable_interrupts(struct radeon_device *rdev)
4464{
4465 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4466 u32 ih_cntl = RREG32(IH_CNTL);
4467
4468 ih_rb_cntl &= ~IH_RB_ENABLE;
4469 ih_cntl &= ~ENABLE_INTR;
4470 WREG32(IH_RB_CNTL, ih_rb_cntl);
4471 WREG32(IH_CNTL, ih_cntl);
4472 /* set rptr, wptr to 0 */
4473 WREG32(IH_RB_RPTR, 0);
4474 WREG32(IH_RB_WPTR, 0);
4475 rdev->ih.enabled = false;
4476 rdev->ih.rptr = 0;
4477}
4478
4479/**
4480 * cik_disable_interrupt_state - Disable all interrupt sources
4481 *
4482 * @rdev: radeon_device pointer
4483 *
4484 * Clear all interrupt enable bits used by the driver (CIK).
4485 */
4486static void cik_disable_interrupt_state(struct radeon_device *rdev)
4487{
4488 u32 tmp;
4489
4490 /* gfx ring */
4491 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004492 /* sdma */
4493 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4494 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4495 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4496 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004497 /* compute queues */
4498 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4499 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4500 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4501 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4502 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4503 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4504 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4505 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4506 /* grbm */
4507 WREG32(GRBM_INT_CNTL, 0);
4508 /* vline/vblank, etc. */
4509 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4510 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4511 if (rdev->num_crtc >= 4) {
4512 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4513 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4514 }
4515 if (rdev->num_crtc >= 6) {
4516 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4517 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4518 }
4519
4520 /* dac hotplug */
4521 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4522
4523 /* digital hotplug */
4524 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4525 WREG32(DC_HPD1_INT_CONTROL, tmp);
4526 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4527 WREG32(DC_HPD2_INT_CONTROL, tmp);
4528 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4529 WREG32(DC_HPD3_INT_CONTROL, tmp);
4530 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4531 WREG32(DC_HPD4_INT_CONTROL, tmp);
4532 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4533 WREG32(DC_HPD5_INT_CONTROL, tmp);
4534 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4535 WREG32(DC_HPD6_INT_CONTROL, tmp);
4536
4537}
4538
4539/**
4540 * cik_irq_init - init and enable the interrupt ring
4541 *
4542 * @rdev: radeon_device pointer
4543 *
4544 * Allocate a ring buffer for the interrupt controller,
4545 * enable the RLC, disable interrupts, enable the IH
4546 * ring buffer and enable it (CIK).
4547 * Called at device load and reume.
4548 * Returns 0 for success, errors for failure.
4549 */
4550static int cik_irq_init(struct radeon_device *rdev)
4551{
4552 int ret = 0;
4553 int rb_bufsz;
4554 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4555
4556 /* allocate ring */
4557 ret = r600_ih_ring_alloc(rdev);
4558 if (ret)
4559 return ret;
4560
4561 /* disable irqs */
4562 cik_disable_interrupts(rdev);
4563
4564 /* init rlc */
4565 ret = cik_rlc_resume(rdev);
4566 if (ret) {
4567 r600_ih_ring_fini(rdev);
4568 return ret;
4569 }
4570
4571 /* setup interrupt control */
4572 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4573 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4574 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4575 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4576 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4577 */
4578 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4579 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4580 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4581 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4582
4583 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4584 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4585
4586 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4587 IH_WPTR_OVERFLOW_CLEAR |
4588 (rb_bufsz << 1));
4589
4590 if (rdev->wb.enabled)
4591 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4592
4593 /* set the writeback address whether it's enabled or not */
4594 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4595 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4596
4597 WREG32(IH_RB_CNTL, ih_rb_cntl);
4598
4599 /* set rptr, wptr to 0 */
4600 WREG32(IH_RB_RPTR, 0);
4601 WREG32(IH_RB_WPTR, 0);
4602
4603 /* Default settings for IH_CNTL (disabled at first) */
4604 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4605 /* RPTR_REARM only works if msi's are enabled */
4606 if (rdev->msi_enabled)
4607 ih_cntl |= RPTR_REARM;
4608 WREG32(IH_CNTL, ih_cntl);
4609
4610 /* force the active interrupt state to all disabled */
4611 cik_disable_interrupt_state(rdev);
4612
4613 pci_set_master(rdev->pdev);
4614
4615 /* enable irqs */
4616 cik_enable_interrupts(rdev);
4617
4618 return ret;
4619}
4620
4621/**
4622 * cik_irq_set - enable/disable interrupt sources
4623 *
4624 * @rdev: radeon_device pointer
4625 *
4626 * Enable interrupt sources on the GPU (vblanks, hpd,
4627 * etc.) (CIK).
4628 * Returns 0 for success, errors for failure.
4629 */
4630int cik_irq_set(struct radeon_device *rdev)
4631{
4632 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4633 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4634 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4635 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4636 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04004637 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05004638
4639 if (!rdev->irq.installed) {
4640 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4641 return -EINVAL;
4642 }
4643 /* don't enable anything if the ih is disabled */
4644 if (!rdev->ih.enabled) {
4645 cik_disable_interrupts(rdev);
4646 /* force the active interrupt state to all disabled */
4647 cik_disable_interrupt_state(rdev);
4648 return 0;
4649 }
4650
4651 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4652 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4653 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4654 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4655 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4656 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4657
Alex Deucher21a93e12013-04-09 12:47:11 -04004658 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4659 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4660
Alex Deuchera59781b2012-11-09 10:45:57 -05004661 /* enable CP interrupts on all rings */
4662 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4663 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4664 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4665 }
4666 /* TODO: compute queues! */
4667 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4668
Alex Deucher21a93e12013-04-09 12:47:11 -04004669 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4670 DRM_DEBUG("cik_irq_set: sw int dma\n");
4671 dma_cntl |= TRAP_ENABLE;
4672 }
4673
4674 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4675 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4676 dma_cntl1 |= TRAP_ENABLE;
4677 }
4678
Alex Deuchera59781b2012-11-09 10:45:57 -05004679 if (rdev->irq.crtc_vblank_int[0] ||
4680 atomic_read(&rdev->irq.pflip[0])) {
4681 DRM_DEBUG("cik_irq_set: vblank 0\n");
4682 crtc1 |= VBLANK_INTERRUPT_MASK;
4683 }
4684 if (rdev->irq.crtc_vblank_int[1] ||
4685 atomic_read(&rdev->irq.pflip[1])) {
4686 DRM_DEBUG("cik_irq_set: vblank 1\n");
4687 crtc2 |= VBLANK_INTERRUPT_MASK;
4688 }
4689 if (rdev->irq.crtc_vblank_int[2] ||
4690 atomic_read(&rdev->irq.pflip[2])) {
4691 DRM_DEBUG("cik_irq_set: vblank 2\n");
4692 crtc3 |= VBLANK_INTERRUPT_MASK;
4693 }
4694 if (rdev->irq.crtc_vblank_int[3] ||
4695 atomic_read(&rdev->irq.pflip[3])) {
4696 DRM_DEBUG("cik_irq_set: vblank 3\n");
4697 crtc4 |= VBLANK_INTERRUPT_MASK;
4698 }
4699 if (rdev->irq.crtc_vblank_int[4] ||
4700 atomic_read(&rdev->irq.pflip[4])) {
4701 DRM_DEBUG("cik_irq_set: vblank 4\n");
4702 crtc5 |= VBLANK_INTERRUPT_MASK;
4703 }
4704 if (rdev->irq.crtc_vblank_int[5] ||
4705 atomic_read(&rdev->irq.pflip[5])) {
4706 DRM_DEBUG("cik_irq_set: vblank 5\n");
4707 crtc6 |= VBLANK_INTERRUPT_MASK;
4708 }
4709 if (rdev->irq.hpd[0]) {
4710 DRM_DEBUG("cik_irq_set: hpd 1\n");
4711 hpd1 |= DC_HPDx_INT_EN;
4712 }
4713 if (rdev->irq.hpd[1]) {
4714 DRM_DEBUG("cik_irq_set: hpd 2\n");
4715 hpd2 |= DC_HPDx_INT_EN;
4716 }
4717 if (rdev->irq.hpd[2]) {
4718 DRM_DEBUG("cik_irq_set: hpd 3\n");
4719 hpd3 |= DC_HPDx_INT_EN;
4720 }
4721 if (rdev->irq.hpd[3]) {
4722 DRM_DEBUG("cik_irq_set: hpd 4\n");
4723 hpd4 |= DC_HPDx_INT_EN;
4724 }
4725 if (rdev->irq.hpd[4]) {
4726 DRM_DEBUG("cik_irq_set: hpd 5\n");
4727 hpd5 |= DC_HPDx_INT_EN;
4728 }
4729 if (rdev->irq.hpd[5]) {
4730 DRM_DEBUG("cik_irq_set: hpd 6\n");
4731 hpd6 |= DC_HPDx_INT_EN;
4732 }
4733
4734 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4735
Alex Deucher21a93e12013-04-09 12:47:11 -04004736 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4737 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4738
Alex Deuchera59781b2012-11-09 10:45:57 -05004739 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4740
4741 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4742 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4743 if (rdev->num_crtc >= 4) {
4744 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4745 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4746 }
4747 if (rdev->num_crtc >= 6) {
4748 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4749 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4750 }
4751
4752 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4753 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4754 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4755 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4756 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4757 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4758
4759 return 0;
4760}
4761
4762/**
4763 * cik_irq_ack - ack interrupt sources
4764 *
4765 * @rdev: radeon_device pointer
4766 *
4767 * Ack interrupt sources on the GPU (vblanks, hpd,
4768 * etc.) (CIK). Certain interrupts sources are sw
4769 * generated and do not require an explicit ack.
4770 */
4771static inline void cik_irq_ack(struct radeon_device *rdev)
4772{
4773 u32 tmp;
4774
4775 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4776 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4777 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4778 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4779 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4780 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4781 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4782
4783 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4784 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4785 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4786 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4787 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4788 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4789 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4790 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4791
4792 if (rdev->num_crtc >= 4) {
4793 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4794 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4795 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4796 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4797 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4798 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4799 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4800 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4801 }
4802
4803 if (rdev->num_crtc >= 6) {
4804 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4805 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4806 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4807 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4808 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4809 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4810 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4811 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4812 }
4813
4814 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4815 tmp = RREG32(DC_HPD1_INT_CONTROL);
4816 tmp |= DC_HPDx_INT_ACK;
4817 WREG32(DC_HPD1_INT_CONTROL, tmp);
4818 }
4819 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4820 tmp = RREG32(DC_HPD2_INT_CONTROL);
4821 tmp |= DC_HPDx_INT_ACK;
4822 WREG32(DC_HPD2_INT_CONTROL, tmp);
4823 }
4824 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4825 tmp = RREG32(DC_HPD3_INT_CONTROL);
4826 tmp |= DC_HPDx_INT_ACK;
4827 WREG32(DC_HPD3_INT_CONTROL, tmp);
4828 }
4829 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4830 tmp = RREG32(DC_HPD4_INT_CONTROL);
4831 tmp |= DC_HPDx_INT_ACK;
4832 WREG32(DC_HPD4_INT_CONTROL, tmp);
4833 }
4834 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4835 tmp = RREG32(DC_HPD5_INT_CONTROL);
4836 tmp |= DC_HPDx_INT_ACK;
4837 WREG32(DC_HPD5_INT_CONTROL, tmp);
4838 }
4839 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4840 tmp = RREG32(DC_HPD5_INT_CONTROL);
4841 tmp |= DC_HPDx_INT_ACK;
4842 WREG32(DC_HPD6_INT_CONTROL, tmp);
4843 }
4844}
4845
4846/**
4847 * cik_irq_disable - disable interrupts
4848 *
4849 * @rdev: radeon_device pointer
4850 *
4851 * Disable interrupts on the hw (CIK).
4852 */
4853static void cik_irq_disable(struct radeon_device *rdev)
4854{
4855 cik_disable_interrupts(rdev);
4856 /* Wait and acknowledge irq */
4857 mdelay(1);
4858 cik_irq_ack(rdev);
4859 cik_disable_interrupt_state(rdev);
4860}
4861
4862/**
4863 * cik_irq_disable - disable interrupts for suspend
4864 *
4865 * @rdev: radeon_device pointer
4866 *
4867 * Disable interrupts and stop the RLC (CIK).
4868 * Used for suspend.
4869 */
4870static void cik_irq_suspend(struct radeon_device *rdev)
4871{
4872 cik_irq_disable(rdev);
4873 cik_rlc_stop(rdev);
4874}
4875
4876/**
4877 * cik_irq_fini - tear down interrupt support
4878 *
4879 * @rdev: radeon_device pointer
4880 *
4881 * Disable interrupts on the hw and free the IH ring
4882 * buffer (CIK).
4883 * Used for driver unload.
4884 */
4885static void cik_irq_fini(struct radeon_device *rdev)
4886{
4887 cik_irq_suspend(rdev);
4888 r600_ih_ring_fini(rdev);
4889}
4890
4891/**
4892 * cik_get_ih_wptr - get the IH ring buffer wptr
4893 *
4894 * @rdev: radeon_device pointer
4895 *
4896 * Get the IH ring buffer wptr from either the register
4897 * or the writeback memory buffer (CIK). Also check for
4898 * ring buffer overflow and deal with it.
4899 * Used by cik_irq_process().
4900 * Returns the value of the wptr.
4901 */
4902static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4903{
4904 u32 wptr, tmp;
4905
4906 if (rdev->wb.enabled)
4907 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4908 else
4909 wptr = RREG32(IH_RB_WPTR);
4910
4911 if (wptr & RB_OVERFLOW) {
4912 /* When a ring buffer overflow happen start parsing interrupt
4913 * from the last not overwritten vector (wptr + 16). Hopefully
4914 * this should allow us to catchup.
4915 */
4916 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4917 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4918 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4919 tmp = RREG32(IH_RB_CNTL);
4920 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4921 WREG32(IH_RB_CNTL, tmp);
4922 }
4923 return (wptr & rdev->ih.ptr_mask);
4924}
4925
4926/* CIK IV Ring
4927 * Each IV ring entry is 128 bits:
4928 * [7:0] - interrupt source id
4929 * [31:8] - reserved
4930 * [59:32] - interrupt source data
4931 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04004932 * [71:64] - RINGID
4933 * CP:
4934 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05004935 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4936 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4937 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4938 * PIPE_ID - ME0 0=3D
4939 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04004940 * SDMA:
4941 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4942 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4943 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05004944 * [79:72] - VMID
4945 * [95:80] - PASID
4946 * [127:96] - reserved
4947 */
4948/**
4949 * cik_irq_process - interrupt handler
4950 *
4951 * @rdev: radeon_device pointer
4952 *
4953 * Interrupt hander (CIK). Walk the IH ring,
4954 * ack interrupts and schedule work to handle
4955 * interrupt events.
4956 * Returns irq process return code.
4957 */
4958int cik_irq_process(struct radeon_device *rdev)
4959{
4960 u32 wptr;
4961 u32 rptr;
4962 u32 src_id, src_data, ring_id;
4963 u8 me_id, pipe_id, queue_id;
4964 u32 ring_index;
4965 bool queue_hotplug = false;
4966 bool queue_reset = false;
4967
4968 if (!rdev->ih.enabled || rdev->shutdown)
4969 return IRQ_NONE;
4970
4971 wptr = cik_get_ih_wptr(rdev);
4972
4973restart_ih:
4974 /* is somebody else already processing irqs? */
4975 if (atomic_xchg(&rdev->ih.lock, 1))
4976 return IRQ_NONE;
4977
4978 rptr = rdev->ih.rptr;
4979 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4980
4981 /* Order reading of wptr vs. reading of IH ring data */
4982 rmb();
4983
4984 /* display interrupts */
4985 cik_irq_ack(rdev);
4986
4987 while (rptr != wptr) {
4988 /* wptr/rptr are in bytes! */
4989 ring_index = rptr / 4;
4990 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4991 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4992 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05004993
4994 switch (src_id) {
4995 case 1: /* D1 vblank/vline */
4996 switch (src_data) {
4997 case 0: /* D1 vblank */
4998 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4999 if (rdev->irq.crtc_vblank_int[0]) {
5000 drm_handle_vblank(rdev->ddev, 0);
5001 rdev->pm.vblank_sync = true;
5002 wake_up(&rdev->irq.vblank_queue);
5003 }
5004 if (atomic_read(&rdev->irq.pflip[0]))
5005 radeon_crtc_handle_flip(rdev, 0);
5006 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5007 DRM_DEBUG("IH: D1 vblank\n");
5008 }
5009 break;
5010 case 1: /* D1 vline */
5011 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5012 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5013 DRM_DEBUG("IH: D1 vline\n");
5014 }
5015 break;
5016 default:
5017 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5018 break;
5019 }
5020 break;
5021 case 2: /* D2 vblank/vline */
5022 switch (src_data) {
5023 case 0: /* D2 vblank */
5024 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5025 if (rdev->irq.crtc_vblank_int[1]) {
5026 drm_handle_vblank(rdev->ddev, 1);
5027 rdev->pm.vblank_sync = true;
5028 wake_up(&rdev->irq.vblank_queue);
5029 }
5030 if (atomic_read(&rdev->irq.pflip[1]))
5031 radeon_crtc_handle_flip(rdev, 1);
5032 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5033 DRM_DEBUG("IH: D2 vblank\n");
5034 }
5035 break;
5036 case 1: /* D2 vline */
5037 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5038 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5039 DRM_DEBUG("IH: D2 vline\n");
5040 }
5041 break;
5042 default:
5043 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5044 break;
5045 }
5046 break;
5047 case 3: /* D3 vblank/vline */
5048 switch (src_data) {
5049 case 0: /* D3 vblank */
5050 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5051 if (rdev->irq.crtc_vblank_int[2]) {
5052 drm_handle_vblank(rdev->ddev, 2);
5053 rdev->pm.vblank_sync = true;
5054 wake_up(&rdev->irq.vblank_queue);
5055 }
5056 if (atomic_read(&rdev->irq.pflip[2]))
5057 radeon_crtc_handle_flip(rdev, 2);
5058 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5059 DRM_DEBUG("IH: D3 vblank\n");
5060 }
5061 break;
5062 case 1: /* D3 vline */
5063 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5064 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5065 DRM_DEBUG("IH: D3 vline\n");
5066 }
5067 break;
5068 default:
5069 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5070 break;
5071 }
5072 break;
5073 case 4: /* D4 vblank/vline */
5074 switch (src_data) {
5075 case 0: /* D4 vblank */
5076 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5077 if (rdev->irq.crtc_vblank_int[3]) {
5078 drm_handle_vblank(rdev->ddev, 3);
5079 rdev->pm.vblank_sync = true;
5080 wake_up(&rdev->irq.vblank_queue);
5081 }
5082 if (atomic_read(&rdev->irq.pflip[3]))
5083 radeon_crtc_handle_flip(rdev, 3);
5084 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5085 DRM_DEBUG("IH: D4 vblank\n");
5086 }
5087 break;
5088 case 1: /* D4 vline */
5089 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5090 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5091 DRM_DEBUG("IH: D4 vline\n");
5092 }
5093 break;
5094 default:
5095 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5096 break;
5097 }
5098 break;
5099 case 5: /* D5 vblank/vline */
5100 switch (src_data) {
5101 case 0: /* D5 vblank */
5102 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5103 if (rdev->irq.crtc_vblank_int[4]) {
5104 drm_handle_vblank(rdev->ddev, 4);
5105 rdev->pm.vblank_sync = true;
5106 wake_up(&rdev->irq.vblank_queue);
5107 }
5108 if (atomic_read(&rdev->irq.pflip[4]))
5109 radeon_crtc_handle_flip(rdev, 4);
5110 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5111 DRM_DEBUG("IH: D5 vblank\n");
5112 }
5113 break;
5114 case 1: /* D5 vline */
5115 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5116 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5117 DRM_DEBUG("IH: D5 vline\n");
5118 }
5119 break;
5120 default:
5121 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5122 break;
5123 }
5124 break;
5125 case 6: /* D6 vblank/vline */
5126 switch (src_data) {
5127 case 0: /* D6 vblank */
5128 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5129 if (rdev->irq.crtc_vblank_int[5]) {
5130 drm_handle_vblank(rdev->ddev, 5);
5131 rdev->pm.vblank_sync = true;
5132 wake_up(&rdev->irq.vblank_queue);
5133 }
5134 if (atomic_read(&rdev->irq.pflip[5]))
5135 radeon_crtc_handle_flip(rdev, 5);
5136 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5137 DRM_DEBUG("IH: D6 vblank\n");
5138 }
5139 break;
5140 case 1: /* D6 vline */
5141 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5142 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5143 DRM_DEBUG("IH: D6 vline\n");
5144 }
5145 break;
5146 default:
5147 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5148 break;
5149 }
5150 break;
5151 case 42: /* HPD hotplug */
5152 switch (src_data) {
5153 case 0:
5154 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5155 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5156 queue_hotplug = true;
5157 DRM_DEBUG("IH: HPD1\n");
5158 }
5159 break;
5160 case 1:
5161 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5162 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5163 queue_hotplug = true;
5164 DRM_DEBUG("IH: HPD2\n");
5165 }
5166 break;
5167 case 2:
5168 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5169 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5170 queue_hotplug = true;
5171 DRM_DEBUG("IH: HPD3\n");
5172 }
5173 break;
5174 case 3:
5175 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5176 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5177 queue_hotplug = true;
5178 DRM_DEBUG("IH: HPD4\n");
5179 }
5180 break;
5181 case 4:
5182 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5183 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5184 queue_hotplug = true;
5185 DRM_DEBUG("IH: HPD5\n");
5186 }
5187 break;
5188 case 5:
5189 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5190 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5191 queue_hotplug = true;
5192 DRM_DEBUG("IH: HPD6\n");
5193 }
5194 break;
5195 default:
5196 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5197 break;
5198 }
5199 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005200 case 146:
5201 case 147:
5202 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5203 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5204 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5205 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5206 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5207 /* reset addr and status */
5208 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5209 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005210 case 176: /* GFX RB CP_INT */
5211 case 177: /* GFX IB CP_INT */
5212 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5213 break;
5214 case 181: /* CP EOP event */
5215 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005216 /* XXX check the bitfield order! */
5217 me_id = (ring_id & 0x60) >> 5;
5218 pipe_id = (ring_id & 0x18) >> 3;
5219 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005220 switch (me_id) {
5221 case 0:
5222 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5223 break;
5224 case 1:
5225 /* XXX compute */
5226 break;
5227 case 2:
5228 /* XXX compute */
5229 break;
5230 }
5231 break;
5232 case 184: /* CP Privileged reg access */
5233 DRM_ERROR("Illegal register access in command stream\n");
5234 /* XXX check the bitfield order! */
5235 me_id = (ring_id & 0x60) >> 5;
5236 pipe_id = (ring_id & 0x18) >> 3;
5237 queue_id = (ring_id & 0x7) >> 0;
5238 switch (me_id) {
5239 case 0:
5240 /* This results in a full GPU reset, but all we need to do is soft
5241 * reset the CP for gfx
5242 */
5243 queue_reset = true;
5244 break;
5245 case 1:
5246 /* XXX compute */
5247 break;
5248 case 2:
5249 /* XXX compute */
5250 break;
5251 }
5252 break;
5253 case 185: /* CP Privileged inst */
5254 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005255 /* XXX check the bitfield order! */
5256 me_id = (ring_id & 0x60) >> 5;
5257 pipe_id = (ring_id & 0x18) >> 3;
5258 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005259 switch (me_id) {
5260 case 0:
5261 /* This results in a full GPU reset, but all we need to do is soft
5262 * reset the CP for gfx
5263 */
5264 queue_reset = true;
5265 break;
5266 case 1:
5267 /* XXX compute */
5268 break;
5269 case 2:
5270 /* XXX compute */
5271 break;
5272 }
5273 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005274 case 224: /* SDMA trap event */
5275 /* XXX check the bitfield order! */
5276 me_id = (ring_id & 0x3) >> 0;
5277 queue_id = (ring_id & 0xc) >> 2;
5278 DRM_DEBUG("IH: SDMA trap\n");
5279 switch (me_id) {
5280 case 0:
5281 switch (queue_id) {
5282 case 0:
5283 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5284 break;
5285 case 1:
5286 /* XXX compute */
5287 break;
5288 case 2:
5289 /* XXX compute */
5290 break;
5291 }
5292 break;
5293 case 1:
5294 switch (queue_id) {
5295 case 0:
5296 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5297 break;
5298 case 1:
5299 /* XXX compute */
5300 break;
5301 case 2:
5302 /* XXX compute */
5303 break;
5304 }
5305 break;
5306 }
5307 break;
5308 case 241: /* SDMA Privileged inst */
5309 case 247: /* SDMA Privileged inst */
5310 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5311 /* XXX check the bitfield order! */
5312 me_id = (ring_id & 0x3) >> 0;
5313 queue_id = (ring_id & 0xc) >> 2;
5314 switch (me_id) {
5315 case 0:
5316 switch (queue_id) {
5317 case 0:
5318 queue_reset = true;
5319 break;
5320 case 1:
5321 /* XXX compute */
5322 queue_reset = true;
5323 break;
5324 case 2:
5325 /* XXX compute */
5326 queue_reset = true;
5327 break;
5328 }
5329 break;
5330 case 1:
5331 switch (queue_id) {
5332 case 0:
5333 queue_reset = true;
5334 break;
5335 case 1:
5336 /* XXX compute */
5337 queue_reset = true;
5338 break;
5339 case 2:
5340 /* XXX compute */
5341 queue_reset = true;
5342 break;
5343 }
5344 break;
5345 }
5346 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005347 case 233: /* GUI IDLE */
5348 DRM_DEBUG("IH: GUI idle\n");
5349 break;
5350 default:
5351 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5352 break;
5353 }
5354
5355 /* wptr/rptr are in bytes! */
5356 rptr += 16;
5357 rptr &= rdev->ih.ptr_mask;
5358 }
5359 if (queue_hotplug)
5360 schedule_work(&rdev->hotplug_work);
5361 if (queue_reset)
5362 schedule_work(&rdev->reset_work);
5363 rdev->ih.rptr = rptr;
5364 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5365 atomic_set(&rdev->ih.lock, 0);
5366
5367 /* make sure wptr hasn't changed while processing */
5368 wptr = cik_get_ih_wptr(rdev);
5369 if (wptr != rptr)
5370 goto restart_ih;
5371
5372 return IRQ_HANDLED;
5373}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005374
5375/*
5376 * startup/shutdown callbacks
5377 */
5378/**
5379 * cik_startup - program the asic to a functional state
5380 *
5381 * @rdev: radeon_device pointer
5382 *
5383 * Programs the asic to a functional state (CIK).
5384 * Called by cik_init() and cik_resume().
5385 * Returns 0 for success, error for failure.
5386 */
5387static int cik_startup(struct radeon_device *rdev)
5388{
5389 struct radeon_ring *ring;
5390 int r;
5391
5392 if (rdev->flags & RADEON_IS_IGP) {
5393 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5394 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5395 r = cik_init_microcode(rdev);
5396 if (r) {
5397 DRM_ERROR("Failed to load firmware!\n");
5398 return r;
5399 }
5400 }
5401 } else {
5402 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5403 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5404 !rdev->mc_fw) {
5405 r = cik_init_microcode(rdev);
5406 if (r) {
5407 DRM_ERROR("Failed to load firmware!\n");
5408 return r;
5409 }
5410 }
5411
5412 r = ci_mc_load_microcode(rdev);
5413 if (r) {
5414 DRM_ERROR("Failed to load MC firmware!\n");
5415 return r;
5416 }
5417 }
5418
5419 r = r600_vram_scratch_init(rdev);
5420 if (r)
5421 return r;
5422
5423 cik_mc_program(rdev);
5424 r = cik_pcie_gart_enable(rdev);
5425 if (r)
5426 return r;
5427 cik_gpu_init(rdev);
5428
5429 /* allocate rlc buffers */
5430 r = si_rlc_init(rdev);
5431 if (r) {
5432 DRM_ERROR("Failed to init rlc BOs!\n");
5433 return r;
5434 }
5435
5436 /* allocate wb buffer */
5437 r = radeon_wb_init(rdev);
5438 if (r)
5439 return r;
5440
Alex Deucher963e81f2013-06-26 17:37:11 -04005441 /* allocate mec buffers */
5442 r = cik_mec_init(rdev);
5443 if (r) {
5444 DRM_ERROR("Failed to init MEC BOs!\n");
5445 return r;
5446 }
5447
Alex Deucher7bf94a22012-08-17 11:48:29 -04005448 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5449 if (r) {
5450 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5451 return r;
5452 }
5453
Alex Deucher963e81f2013-06-26 17:37:11 -04005454 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5455 if (r) {
5456 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5457 return r;
5458 }
5459
5460 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5461 if (r) {
5462 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5463 return r;
5464 }
5465
Alex Deucher7bf94a22012-08-17 11:48:29 -04005466 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5467 if (r) {
5468 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5469 return r;
5470 }
5471
5472 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5473 if (r) {
5474 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5475 return r;
5476 }
5477
Christian König87167bb2013-04-09 13:39:21 -04005478 r = cik_uvd_resume(rdev);
5479 if (!r) {
5480 r = radeon_fence_driver_start_ring(rdev,
5481 R600_RING_TYPE_UVD_INDEX);
5482 if (r)
5483 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5484 }
5485 if (r)
5486 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5487
Alex Deucher7bf94a22012-08-17 11:48:29 -04005488 /* Enable IRQ */
5489 if (!rdev->irq.installed) {
5490 r = radeon_irq_kms_init(rdev);
5491 if (r)
5492 return r;
5493 }
5494
5495 r = cik_irq_init(rdev);
5496 if (r) {
5497 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5498 radeon_irq_kms_fini(rdev);
5499 return r;
5500 }
5501 cik_irq_set(rdev);
5502
5503 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5504 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5505 CP_RB0_RPTR, CP_RB0_WPTR,
5506 0, 0xfffff, RADEON_CP_PACKET2);
5507 if (r)
5508 return r;
5509
Alex Deucher963e81f2013-06-26 17:37:11 -04005510 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04005511 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04005512 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5513 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5514 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04005515 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04005516 if (r)
5517 return r;
5518 ring->me = 1; /* first MEC */
5519 ring->pipe = 0; /* first pipe */
5520 ring->queue = 0; /* first queue */
5521 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
5522
Alex Deucher2615b532013-06-03 11:21:58 -04005523 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04005524 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5525 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5526 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04005527 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04005528 if (r)
5529 return r;
5530 /* dGPU only have 1 MEC */
5531 ring->me = 1; /* first MEC */
5532 ring->pipe = 0; /* first pipe */
5533 ring->queue = 1; /* second queue */
5534 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
5535
Alex Deucher7bf94a22012-08-17 11:48:29 -04005536 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5537 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5538 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
5539 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
5540 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5541 if (r)
5542 return r;
5543
5544 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5545 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5546 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
5547 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
5548 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5549 if (r)
5550 return r;
5551
5552 r = cik_cp_resume(rdev);
5553 if (r)
5554 return r;
5555
5556 r = cik_sdma_resume(rdev);
5557 if (r)
5558 return r;
5559
Christian König87167bb2013-04-09 13:39:21 -04005560 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5561 if (ring->ring_size) {
5562 r = radeon_ring_init(rdev, ring, ring->ring_size,
5563 R600_WB_UVD_RPTR_OFFSET,
5564 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5565 0, 0xfffff, RADEON_CP_PACKET2);
5566 if (!r)
5567 r = r600_uvd_init(rdev);
5568 if (r)
5569 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5570 }
5571
Alex Deucher7bf94a22012-08-17 11:48:29 -04005572 r = radeon_ib_pool_init(rdev);
5573 if (r) {
5574 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5575 return r;
5576 }
5577
5578 r = radeon_vm_manager_init(rdev);
5579 if (r) {
5580 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5581 return r;
5582 }
5583
5584 return 0;
5585}
5586
5587/**
5588 * cik_resume - resume the asic to a functional state
5589 *
5590 * @rdev: radeon_device pointer
5591 *
5592 * Programs the asic to a functional state (CIK).
5593 * Called at resume.
5594 * Returns 0 for success, error for failure.
5595 */
5596int cik_resume(struct radeon_device *rdev)
5597{
5598 int r;
5599
5600 /* post card */
5601 atom_asic_init(rdev->mode_info.atom_context);
5602
5603 rdev->accel_working = true;
5604 r = cik_startup(rdev);
5605 if (r) {
5606 DRM_ERROR("cik startup failed on resume\n");
5607 rdev->accel_working = false;
5608 return r;
5609 }
5610
5611 return r;
5612
5613}
5614
5615/**
5616 * cik_suspend - suspend the asic
5617 *
5618 * @rdev: radeon_device pointer
5619 *
5620 * Bring the chip into a state suitable for suspend (CIK).
5621 * Called at suspend.
5622 * Returns 0 for success.
5623 */
5624int cik_suspend(struct radeon_device *rdev)
5625{
5626 radeon_vm_manager_fini(rdev);
5627 cik_cp_enable(rdev, false);
5628 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04005629 r600_uvd_rbc_stop(rdev);
5630 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005631 cik_irq_suspend(rdev);
5632 radeon_wb_disable(rdev);
5633 cik_pcie_gart_disable(rdev);
5634 return 0;
5635}
5636
5637/* Plan is to move initialization in that function and use
5638 * helper function so that radeon_device_init pretty much
5639 * do nothing more than calling asic specific function. This
5640 * should also allow to remove a bunch of callback function
5641 * like vram_info.
5642 */
5643/**
5644 * cik_init - asic specific driver and hw init
5645 *
5646 * @rdev: radeon_device pointer
5647 *
5648 * Setup asic specific driver variables and program the hw
5649 * to a functional state (CIK).
5650 * Called at driver startup.
5651 * Returns 0 for success, errors for failure.
5652 */
5653int cik_init(struct radeon_device *rdev)
5654{
5655 struct radeon_ring *ring;
5656 int r;
5657
5658 /* Read BIOS */
5659 if (!radeon_get_bios(rdev)) {
5660 if (ASIC_IS_AVIVO(rdev))
5661 return -EINVAL;
5662 }
5663 /* Must be an ATOMBIOS */
5664 if (!rdev->is_atom_bios) {
5665 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5666 return -EINVAL;
5667 }
5668 r = radeon_atombios_init(rdev);
5669 if (r)
5670 return r;
5671
5672 /* Post card if necessary */
5673 if (!radeon_card_posted(rdev)) {
5674 if (!rdev->bios) {
5675 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5676 return -EINVAL;
5677 }
5678 DRM_INFO("GPU not posted. posting now...\n");
5679 atom_asic_init(rdev->mode_info.atom_context);
5680 }
5681 /* Initialize scratch registers */
5682 cik_scratch_init(rdev);
5683 /* Initialize surface registers */
5684 radeon_surface_init(rdev);
5685 /* Initialize clocks */
5686 radeon_get_clock_info(rdev->ddev);
5687
5688 /* Fence driver */
5689 r = radeon_fence_driver_init(rdev);
5690 if (r)
5691 return r;
5692
5693 /* initialize memory controller */
5694 r = cik_mc_init(rdev);
5695 if (r)
5696 return r;
5697 /* Memory manager */
5698 r = radeon_bo_init(rdev);
5699 if (r)
5700 return r;
5701
5702 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5703 ring->ring_obj = NULL;
5704 r600_ring_init(rdev, ring, 1024 * 1024);
5705
Alex Deucher963e81f2013-06-26 17:37:11 -04005706 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5707 ring->ring_obj = NULL;
5708 r600_ring_init(rdev, ring, 1024 * 1024);
5709 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5710 if (r)
5711 return r;
5712
5713 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5714 ring->ring_obj = NULL;
5715 r600_ring_init(rdev, ring, 1024 * 1024);
5716 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
5717 if (r)
5718 return r;
5719
Alex Deucher7bf94a22012-08-17 11:48:29 -04005720 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5721 ring->ring_obj = NULL;
5722 r600_ring_init(rdev, ring, 256 * 1024);
5723
5724 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5725 ring->ring_obj = NULL;
5726 r600_ring_init(rdev, ring, 256 * 1024);
5727
Christian König87167bb2013-04-09 13:39:21 -04005728 r = radeon_uvd_init(rdev);
5729 if (!r) {
5730 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5731 ring->ring_obj = NULL;
5732 r600_ring_init(rdev, ring, 4096);
5733 }
5734
Alex Deucher7bf94a22012-08-17 11:48:29 -04005735 rdev->ih.ring_obj = NULL;
5736 r600_ih_ring_init(rdev, 64 * 1024);
5737
5738 r = r600_pcie_gart_init(rdev);
5739 if (r)
5740 return r;
5741
5742 rdev->accel_working = true;
5743 r = cik_startup(rdev);
5744 if (r) {
5745 dev_err(rdev->dev, "disabling GPU acceleration\n");
5746 cik_cp_fini(rdev);
5747 cik_sdma_fini(rdev);
5748 cik_irq_fini(rdev);
5749 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04005750 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005751 radeon_wb_fini(rdev);
5752 radeon_ib_pool_fini(rdev);
5753 radeon_vm_manager_fini(rdev);
5754 radeon_irq_kms_fini(rdev);
5755 cik_pcie_gart_fini(rdev);
5756 rdev->accel_working = false;
5757 }
5758
5759 /* Don't start up if the MC ucode is missing.
5760 * The default clocks and voltages before the MC ucode
5761 * is loaded are not suffient for advanced operations.
5762 */
5763 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5764 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5765 return -EINVAL;
5766 }
5767
5768 return 0;
5769}
5770
5771/**
5772 * cik_fini - asic specific driver and hw fini
5773 *
5774 * @rdev: radeon_device pointer
5775 *
5776 * Tear down the asic specific driver variables and program the hw
5777 * to an idle state (CIK).
5778 * Called at driver unload.
5779 */
5780void cik_fini(struct radeon_device *rdev)
5781{
5782 cik_cp_fini(rdev);
5783 cik_sdma_fini(rdev);
5784 cik_irq_fini(rdev);
5785 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04005786 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005787 radeon_wb_fini(rdev);
5788 radeon_vm_manager_fini(rdev);
5789 radeon_ib_pool_fini(rdev);
5790 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04005791 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005792 cik_pcie_gart_fini(rdev);
5793 r600_vram_scratch_fini(rdev);
5794 radeon_gem_fini(rdev);
5795 radeon_fence_driver_fini(rdev);
5796 radeon_bo_fini(rdev);
5797 radeon_atombios_fini(rdev);
5798 kfree(rdev->bios);
5799 rdev->bios = NULL;
5800}
Alex Deuchercd84a272012-07-20 17:13:13 -04005801
5802/* display watermark setup */
5803/**
5804 * dce8_line_buffer_adjust - Set up the line buffer
5805 *
5806 * @rdev: radeon_device pointer
5807 * @radeon_crtc: the selected display controller
5808 * @mode: the current display mode on the selected display
5809 * controller
5810 *
5811 * Setup up the line buffer allocation for
5812 * the selected display controller (CIK).
5813 * Returns the line buffer size in pixels.
5814 */
5815static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5816 struct radeon_crtc *radeon_crtc,
5817 struct drm_display_mode *mode)
5818{
5819 u32 tmp;
5820
5821 /*
5822 * Line Buffer Setup
5823 * There are 6 line buffers, one for each display controllers.
5824 * There are 3 partitions per LB. Select the number of partitions
5825 * to enable based on the display width. For display widths larger
5826 * than 4096, you need use to use 2 display controllers and combine
5827 * them using the stereo blender.
5828 */
5829 if (radeon_crtc->base.enabled && mode) {
5830 if (mode->crtc_hdisplay < 1920)
5831 tmp = 1;
5832 else if (mode->crtc_hdisplay < 2560)
5833 tmp = 2;
5834 else if (mode->crtc_hdisplay < 4096)
5835 tmp = 0;
5836 else {
5837 DRM_DEBUG_KMS("Mode too big for LB!\n");
5838 tmp = 0;
5839 }
5840 } else
5841 tmp = 1;
5842
5843 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5844 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5845
5846 if (radeon_crtc->base.enabled && mode) {
5847 switch (tmp) {
5848 case 0:
5849 default:
5850 return 4096 * 2;
5851 case 1:
5852 return 1920 * 2;
5853 case 2:
5854 return 2560 * 2;
5855 }
5856 }
5857
5858 /* controller not enabled, so no lb used */
5859 return 0;
5860}
5861
5862/**
5863 * cik_get_number_of_dram_channels - get the number of dram channels
5864 *
5865 * @rdev: radeon_device pointer
5866 *
5867 * Look up the number of video ram channels (CIK).
5868 * Used for display watermark bandwidth calculations
5869 * Returns the number of dram channels
5870 */
5871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5872{
5873 u32 tmp = RREG32(MC_SHARED_CHMAP);
5874
5875 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5876 case 0:
5877 default:
5878 return 1;
5879 case 1:
5880 return 2;
5881 case 2:
5882 return 4;
5883 case 3:
5884 return 8;
5885 case 4:
5886 return 3;
5887 case 5:
5888 return 6;
5889 case 6:
5890 return 10;
5891 case 7:
5892 return 12;
5893 case 8:
5894 return 16;
5895 }
5896}
5897
5898struct dce8_wm_params {
5899 u32 dram_channels; /* number of dram channels */
5900 u32 yclk; /* bandwidth per dram data pin in kHz */
5901 u32 sclk; /* engine clock in kHz */
5902 u32 disp_clk; /* display clock in kHz */
5903 u32 src_width; /* viewport width */
5904 u32 active_time; /* active display time in ns */
5905 u32 blank_time; /* blank time in ns */
5906 bool interlaced; /* mode is interlaced */
5907 fixed20_12 vsc; /* vertical scale ratio */
5908 u32 num_heads; /* number of active crtcs */
5909 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5910 u32 lb_size; /* line buffer allocated to pipe */
5911 u32 vtaps; /* vertical scaler taps */
5912};
5913
5914/**
5915 * dce8_dram_bandwidth - get the dram bandwidth
5916 *
5917 * @wm: watermark calculation data
5918 *
5919 * Calculate the raw dram bandwidth (CIK).
5920 * Used for display watermark bandwidth calculations
5921 * Returns the dram bandwidth in MBytes/s
5922 */
5923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5924{
5925 /* Calculate raw DRAM Bandwidth */
5926 fixed20_12 dram_efficiency; /* 0.7 */
5927 fixed20_12 yclk, dram_channels, bandwidth;
5928 fixed20_12 a;
5929
5930 a.full = dfixed_const(1000);
5931 yclk.full = dfixed_const(wm->yclk);
5932 yclk.full = dfixed_div(yclk, a);
5933 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5934 a.full = dfixed_const(10);
5935 dram_efficiency.full = dfixed_const(7);
5936 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5937 bandwidth.full = dfixed_mul(dram_channels, yclk);
5938 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5939
5940 return dfixed_trunc(bandwidth);
5941}
5942
5943/**
5944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5945 *
5946 * @wm: watermark calculation data
5947 *
5948 * Calculate the dram bandwidth used for display (CIK).
5949 * Used for display watermark bandwidth calculations
5950 * Returns the dram bandwidth for display in MBytes/s
5951 */
5952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5953{
5954 /* Calculate DRAM Bandwidth and the part allocated to display. */
5955 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5956 fixed20_12 yclk, dram_channels, bandwidth;
5957 fixed20_12 a;
5958
5959 a.full = dfixed_const(1000);
5960 yclk.full = dfixed_const(wm->yclk);
5961 yclk.full = dfixed_div(yclk, a);
5962 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5963 a.full = dfixed_const(10);
5964 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5965 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5966 bandwidth.full = dfixed_mul(dram_channels, yclk);
5967 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5968
5969 return dfixed_trunc(bandwidth);
5970}
5971
5972/**
5973 * dce8_data_return_bandwidth - get the data return bandwidth
5974 *
5975 * @wm: watermark calculation data
5976 *
5977 * Calculate the data return bandwidth used for display (CIK).
5978 * Used for display watermark bandwidth calculations
5979 * Returns the data return bandwidth in MBytes/s
5980 */
5981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5982{
5983 /* Calculate the display Data return Bandwidth */
5984 fixed20_12 return_efficiency; /* 0.8 */
5985 fixed20_12 sclk, bandwidth;
5986 fixed20_12 a;
5987
5988 a.full = dfixed_const(1000);
5989 sclk.full = dfixed_const(wm->sclk);
5990 sclk.full = dfixed_div(sclk, a);
5991 a.full = dfixed_const(10);
5992 return_efficiency.full = dfixed_const(8);
5993 return_efficiency.full = dfixed_div(return_efficiency, a);
5994 a.full = dfixed_const(32);
5995 bandwidth.full = dfixed_mul(a, sclk);
5996 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5997
5998 return dfixed_trunc(bandwidth);
5999}
6000
6001/**
6002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6003 *
6004 * @wm: watermark calculation data
6005 *
6006 * Calculate the dmif bandwidth used for display (CIK).
6007 * Used for display watermark bandwidth calculations
6008 * Returns the dmif bandwidth in MBytes/s
6009 */
6010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6011{
6012 /* Calculate the DMIF Request Bandwidth */
6013 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6014 fixed20_12 disp_clk, bandwidth;
6015 fixed20_12 a, b;
6016
6017 a.full = dfixed_const(1000);
6018 disp_clk.full = dfixed_const(wm->disp_clk);
6019 disp_clk.full = dfixed_div(disp_clk, a);
6020 a.full = dfixed_const(32);
6021 b.full = dfixed_mul(a, disp_clk);
6022
6023 a.full = dfixed_const(10);
6024 disp_clk_request_efficiency.full = dfixed_const(8);
6025 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6026
6027 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6028
6029 return dfixed_trunc(bandwidth);
6030}
6031
6032/**
6033 * dce8_available_bandwidth - get the min available bandwidth
6034 *
6035 * @wm: watermark calculation data
6036 *
6037 * Calculate the min available bandwidth used for display (CIK).
6038 * Used for display watermark bandwidth calculations
6039 * Returns the min available bandwidth in MBytes/s
6040 */
6041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6042{
6043 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6044 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6045 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6046 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6047
6048 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6049}
6050
6051/**
6052 * dce8_average_bandwidth - get the average available bandwidth
6053 *
6054 * @wm: watermark calculation data
6055 *
6056 * Calculate the average available bandwidth used for display (CIK).
6057 * Used for display watermark bandwidth calculations
6058 * Returns the average available bandwidth in MBytes/s
6059 */
6060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6061{
6062 /* Calculate the display mode Average Bandwidth
6063 * DisplayMode should contain the source and destination dimensions,
6064 * timing, etc.
6065 */
6066 fixed20_12 bpp;
6067 fixed20_12 line_time;
6068 fixed20_12 src_width;
6069 fixed20_12 bandwidth;
6070 fixed20_12 a;
6071
6072 a.full = dfixed_const(1000);
6073 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6074 line_time.full = dfixed_div(line_time, a);
6075 bpp.full = dfixed_const(wm->bytes_per_pixel);
6076 src_width.full = dfixed_const(wm->src_width);
6077 bandwidth.full = dfixed_mul(src_width, bpp);
6078 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6079 bandwidth.full = dfixed_div(bandwidth, line_time);
6080
6081 return dfixed_trunc(bandwidth);
6082}
6083
6084/**
6085 * dce8_latency_watermark - get the latency watermark
6086 *
6087 * @wm: watermark calculation data
6088 *
6089 * Calculate the latency watermark (CIK).
6090 * Used for display watermark bandwidth calculations
6091 * Returns the latency watermark in ns
6092 */
6093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6094{
6095 /* First calculate the latency in ns */
6096 u32 mc_latency = 2000; /* 2000 ns. */
6097 u32 available_bandwidth = dce8_available_bandwidth(wm);
6098 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6099 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6100 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6101 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6102 (wm->num_heads * cursor_line_pair_return_time);
6103 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6104 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6105 u32 tmp, dmif_size = 12288;
6106 fixed20_12 a, b, c;
6107
6108 if (wm->num_heads == 0)
6109 return 0;
6110
6111 a.full = dfixed_const(2);
6112 b.full = dfixed_const(1);
6113 if ((wm->vsc.full > a.full) ||
6114 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6115 (wm->vtaps >= 5) ||
6116 ((wm->vsc.full >= a.full) && wm->interlaced))
6117 max_src_lines_per_dst_line = 4;
6118 else
6119 max_src_lines_per_dst_line = 2;
6120
6121 a.full = dfixed_const(available_bandwidth);
6122 b.full = dfixed_const(wm->num_heads);
6123 a.full = dfixed_div(a, b);
6124
6125 b.full = dfixed_const(mc_latency + 512);
6126 c.full = dfixed_const(wm->disp_clk);
6127 b.full = dfixed_div(b, c);
6128
6129 c.full = dfixed_const(dmif_size);
6130 b.full = dfixed_div(c, b);
6131
6132 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6133
6134 b.full = dfixed_const(1000);
6135 c.full = dfixed_const(wm->disp_clk);
6136 b.full = dfixed_div(c, b);
6137 c.full = dfixed_const(wm->bytes_per_pixel);
6138 b.full = dfixed_mul(b, c);
6139
6140 lb_fill_bw = min(tmp, dfixed_trunc(b));
6141
6142 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6143 b.full = dfixed_const(1000);
6144 c.full = dfixed_const(lb_fill_bw);
6145 b.full = dfixed_div(c, b);
6146 a.full = dfixed_div(a, b);
6147 line_fill_time = dfixed_trunc(a);
6148
6149 if (line_fill_time < wm->active_time)
6150 return latency;
6151 else
6152 return latency + (line_fill_time - wm->active_time);
6153
6154}
6155
6156/**
6157 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6158 * average and available dram bandwidth
6159 *
6160 * @wm: watermark calculation data
6161 *
6162 * Check if the display average bandwidth fits in the display
6163 * dram bandwidth (CIK).
6164 * Used for display watermark bandwidth calculations
6165 * Returns true if the display fits, false if not.
6166 */
6167static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6168{
6169 if (dce8_average_bandwidth(wm) <=
6170 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6171 return true;
6172 else
6173 return false;
6174}
6175
6176/**
6177 * dce8_average_bandwidth_vs_available_bandwidth - check
6178 * average and available bandwidth
6179 *
6180 * @wm: watermark calculation data
6181 *
6182 * Check if the display average bandwidth fits in the display
6183 * available bandwidth (CIK).
6184 * Used for display watermark bandwidth calculations
6185 * Returns true if the display fits, false if not.
6186 */
6187static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6188{
6189 if (dce8_average_bandwidth(wm) <=
6190 (dce8_available_bandwidth(wm) / wm->num_heads))
6191 return true;
6192 else
6193 return false;
6194}
6195
6196/**
6197 * dce8_check_latency_hiding - check latency hiding
6198 *
6199 * @wm: watermark calculation data
6200 *
6201 * Check latency hiding (CIK).
6202 * Used for display watermark bandwidth calculations
6203 * Returns true if the display fits, false if not.
6204 */
6205static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6206{
6207 u32 lb_partitions = wm->lb_size / wm->src_width;
6208 u32 line_time = wm->active_time + wm->blank_time;
6209 u32 latency_tolerant_lines;
6210 u32 latency_hiding;
6211 fixed20_12 a;
6212
6213 a.full = dfixed_const(1);
6214 if (wm->vsc.full > a.full)
6215 latency_tolerant_lines = 1;
6216 else {
6217 if (lb_partitions <= (wm->vtaps + 1))
6218 latency_tolerant_lines = 1;
6219 else
6220 latency_tolerant_lines = 2;
6221 }
6222
6223 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6224
6225 if (dce8_latency_watermark(wm) <= latency_hiding)
6226 return true;
6227 else
6228 return false;
6229}
6230
6231/**
6232 * dce8_program_watermarks - program display watermarks
6233 *
6234 * @rdev: radeon_device pointer
6235 * @radeon_crtc: the selected display controller
6236 * @lb_size: line buffer size
6237 * @num_heads: number of display controllers in use
6238 *
6239 * Calculate and program the display watermarks for the
6240 * selected display controller (CIK).
6241 */
6242static void dce8_program_watermarks(struct radeon_device *rdev,
6243 struct radeon_crtc *radeon_crtc,
6244 u32 lb_size, u32 num_heads)
6245{
6246 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6247 struct dce8_wm_params wm;
6248 u32 pixel_period;
6249 u32 line_time = 0;
6250 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6251 u32 tmp, wm_mask;
6252
6253 if (radeon_crtc->base.enabled && num_heads && mode) {
6254 pixel_period = 1000000 / (u32)mode->clock;
6255 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6256
6257 wm.yclk = rdev->pm.current_mclk * 10;
6258 wm.sclk = rdev->pm.current_sclk * 10;
6259 wm.disp_clk = mode->clock;
6260 wm.src_width = mode->crtc_hdisplay;
6261 wm.active_time = mode->crtc_hdisplay * pixel_period;
6262 wm.blank_time = line_time - wm.active_time;
6263 wm.interlaced = false;
6264 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6265 wm.interlaced = true;
6266 wm.vsc = radeon_crtc->vsc;
6267 wm.vtaps = 1;
6268 if (radeon_crtc->rmx_type != RMX_OFF)
6269 wm.vtaps = 2;
6270 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6271 wm.lb_size = lb_size;
6272 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6273 wm.num_heads = num_heads;
6274
6275 /* set for high clocks */
6276 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6277 /* set for low clocks */
6278 /* wm.yclk = low clk; wm.sclk = low clk */
6279 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6280
6281 /* possibly force display priority to high */
6282 /* should really do this at mode validation time... */
6283 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6284 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6285 !dce8_check_latency_hiding(&wm) ||
6286 (rdev->disp_priority == 2)) {
6287 DRM_DEBUG_KMS("force priority to high\n");
6288 }
6289 }
6290
6291 /* select wm A */
6292 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6293 tmp = wm_mask;
6294 tmp &= ~LATENCY_WATERMARK_MASK(3);
6295 tmp |= LATENCY_WATERMARK_MASK(1);
6296 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6297 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6298 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6299 LATENCY_HIGH_WATERMARK(line_time)));
6300 /* select wm B */
6301 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6302 tmp &= ~LATENCY_WATERMARK_MASK(3);
6303 tmp |= LATENCY_WATERMARK_MASK(2);
6304 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6305 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6306 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6307 LATENCY_HIGH_WATERMARK(line_time)));
6308 /* restore original selection */
6309 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6310}
6311
6312/**
6313 * dce8_bandwidth_update - program display watermarks
6314 *
6315 * @rdev: radeon_device pointer
6316 *
6317 * Calculate and program the display watermarks and line
6318 * buffer allocation (CIK).
6319 */
6320void dce8_bandwidth_update(struct radeon_device *rdev)
6321{
6322 struct drm_display_mode *mode = NULL;
6323 u32 num_heads = 0, lb_size;
6324 int i;
6325
6326 radeon_update_display_priority(rdev);
6327
6328 for (i = 0; i < rdev->num_crtc; i++) {
6329 if (rdev->mode_info.crtcs[i]->base.enabled)
6330 num_heads++;
6331 }
6332 for (i = 0; i < rdev->num_crtc; i++) {
6333 mode = &rdev->mode_info.crtcs[i]->base.mode;
6334 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6335 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6336 }
6337}
Alex Deucher44fa3462012-12-18 22:17:00 -05006338
6339/**
6340 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6341 *
6342 * @rdev: radeon_device pointer
6343 *
6344 * Fetches a GPU clock counter snapshot (SI).
6345 * Returns the 64 bit clock counter snapshot.
6346 */
6347uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6348{
6349 uint64_t clock;
6350
6351 mutex_lock(&rdev->gpu_clock_mutex);
6352 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6353 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6354 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6355 mutex_unlock(&rdev->gpu_clock_mutex);
6356 return clock;
6357}
6358
Christian König87167bb2013-04-09 13:39:21 -04006359static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6360 u32 cntl_reg, u32 status_reg)
6361{
6362 int r, i;
6363 struct atom_clock_dividers dividers;
6364 uint32_t tmp;
6365
6366 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6367 clock, false, &dividers);
6368 if (r)
6369 return r;
6370
6371 tmp = RREG32_SMC(cntl_reg);
6372 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6373 tmp |= dividers.post_divider;
6374 WREG32_SMC(cntl_reg, tmp);
6375
6376 for (i = 0; i < 100; i++) {
6377 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6378 break;
6379 mdelay(10);
6380 }
6381 if (i == 100)
6382 return -ETIMEDOUT;
6383
6384 return 0;
6385}
6386
6387int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6388{
6389 int r = 0;
6390
6391 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6392 if (r)
6393 return r;
6394
6395 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6396 return r;
6397}
6398
6399int cik_uvd_resume(struct radeon_device *rdev)
6400{
6401 uint64_t addr;
6402 uint32_t size;
6403 int r;
6404
6405 r = radeon_uvd_resume(rdev);
6406 if (r)
6407 return r;
6408
6409 /* programm the VCPU memory controller bits 0-27 */
6410 addr = rdev->uvd.gpu_addr >> 3;
6411 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6412 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6413 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6414
6415 addr += size;
6416 size = RADEON_UVD_STACK_SIZE >> 3;
6417 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6418 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6419
6420 addr += size;
6421 size = RADEON_UVD_HEAP_SIZE >> 3;
6422 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6423 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6424
6425 /* bits 28-31 */
6426 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6427 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6428
6429 /* bits 32-39 */
6430 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6431 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6432
6433 return 0;
6434}