blob: a8161d0e5da2b093ef4ea02157d367db7fd6c8aa [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher6e2c3c02013-04-03 19:28:32 -040081/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
Alex Deucher2c679122013-04-09 13:32:18 -0400102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
Alex Deucher75efdee2013-03-04 12:47:46 -0500124/**
125 * cik_mm_rdoorbell - read a doorbell dword
126 *
127 * @rdev: radeon_device pointer
128 * @offset: byte offset into the aperture
129 *
130 * Returns the value in the doorbell aperture at the
131 * requested offset (CIK).
132 */
133u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
134{
135 if (offset < rdev->doorbell.size) {
136 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
137 } else {
138 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
139 return 0;
140 }
141}
142
143/**
144 * cik_mm_wdoorbell - write a doorbell dword
145 *
146 * @rdev: radeon_device pointer
147 * @offset: byte offset into the aperture
148 * @v: value to write
149 *
150 * Writes @v to the doorbell aperture at the
151 * requested offset (CIK).
152 */
153void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
154{
155 if (offset < rdev->doorbell.size) {
156 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
157 } else {
158 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
159 }
160}
161
Alex Deucherbc8273f2012-06-29 19:44:04 -0400162#define BONAIRE_IO_MC_REGS_SIZE 36
163
164static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
165{
166 {0x00000070, 0x04400000},
167 {0x00000071, 0x80c01803},
168 {0x00000072, 0x00004004},
169 {0x00000073, 0x00000100},
170 {0x00000074, 0x00ff0000},
171 {0x00000075, 0x34000000},
172 {0x00000076, 0x08000014},
173 {0x00000077, 0x00cc08ec},
174 {0x00000078, 0x00000400},
175 {0x00000079, 0x00000000},
176 {0x0000007a, 0x04090000},
177 {0x0000007c, 0x00000000},
178 {0x0000007e, 0x4408a8e8},
179 {0x0000007f, 0x00000304},
180 {0x00000080, 0x00000000},
181 {0x00000082, 0x00000001},
182 {0x00000083, 0x00000002},
183 {0x00000084, 0xf3e4f400},
184 {0x00000085, 0x052024e3},
185 {0x00000087, 0x00000000},
186 {0x00000088, 0x01000000},
187 {0x0000008a, 0x1c0a0000},
188 {0x0000008b, 0xff010000},
189 {0x0000008d, 0xffffefff},
190 {0x0000008e, 0xfff3efff},
191 {0x0000008f, 0xfff3efbf},
192 {0x00000092, 0xf7ffffff},
193 {0x00000093, 0xffffff7f},
194 {0x00000095, 0x00101101},
195 {0x00000096, 0x00000fff},
196 {0x00000097, 0x00116fff},
197 {0x00000098, 0x60010000},
198 {0x00000099, 0x10010000},
199 {0x0000009a, 0x00006000},
200 {0x0000009b, 0x00001000},
201 {0x0000009f, 0x00b48000}
202};
203
Alex Deucherb556b122013-01-29 10:44:22 -0500204/**
205 * cik_srbm_select - select specific register instances
206 *
207 * @rdev: radeon_device pointer
208 * @me: selected ME (micro engine)
209 * @pipe: pipe
210 * @queue: queue
211 * @vmid: VMID
212 *
213 * Switches the currently active registers instances. Some
214 * registers are instanced per VMID, others are instanced per
215 * me/pipe/queue combination.
216 */
217static void cik_srbm_select(struct radeon_device *rdev,
218 u32 me, u32 pipe, u32 queue, u32 vmid)
219{
220 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
221 MEID(me & 0x3) |
222 VMID(vmid & 0xf) |
223 QUEUEID(queue & 0x7));
224 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
225}
226
Alex Deucherbc8273f2012-06-29 19:44:04 -0400227/* ucode loading */
228/**
229 * ci_mc_load_microcode - load MC ucode into the hw
230 *
231 * @rdev: radeon_device pointer
232 *
233 * Load the GDDR MC ucode into the hw (CIK).
234 * Returns 0 on success, error on failure.
235 */
236static int ci_mc_load_microcode(struct radeon_device *rdev)
237{
238 const __be32 *fw_data;
239 u32 running, blackout = 0;
240 u32 *io_mc_regs;
241 int i, ucode_size, regs_size;
242
243 if (!rdev->mc_fw)
244 return -EINVAL;
245
246 switch (rdev->family) {
247 case CHIP_BONAIRE:
248 default:
249 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
250 ucode_size = CIK_MC_UCODE_SIZE;
251 regs_size = BONAIRE_IO_MC_REGS_SIZE;
252 break;
253 }
254
255 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
256
257 if (running == 0) {
258 if (running) {
259 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
260 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
261 }
262
263 /* reset the engine and set to writable */
264 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
265 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
266
267 /* load mc io regs */
268 for (i = 0; i < regs_size; i++) {
269 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
270 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
271 }
272 /* load the MC ucode */
273 fw_data = (const __be32 *)rdev->mc_fw->data;
274 for (i = 0; i < ucode_size; i++)
275 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
276
277 /* put the engine back into the active state */
278 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
279 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
280 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
281
282 /* wait for training to complete */
283 for (i = 0; i < rdev->usec_timeout; i++) {
284 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
285 break;
286 udelay(1);
287 }
288 for (i = 0; i < rdev->usec_timeout; i++) {
289 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
290 break;
291 udelay(1);
292 }
293
294 if (running)
295 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
296 }
297
298 return 0;
299}
300
Alex Deucher02c81322012-12-18 21:43:07 -0500301/**
302 * cik_init_microcode - load ucode images from disk
303 *
304 * @rdev: radeon_device pointer
305 *
306 * Use the firmware interface to load the ucode images into
307 * the driver (not loaded into hw).
308 * Returns 0 on success, error on failure.
309 */
310static int cik_init_microcode(struct radeon_device *rdev)
311{
312 struct platform_device *pdev;
313 const char *chip_name;
314 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400315 mec_req_size, rlc_req_size, mc_req_size,
316 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500317 char fw_name[30];
318 int err;
319
320 DRM_DEBUG("\n");
321
322 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
323 err = IS_ERR(pdev);
324 if (err) {
325 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
326 return -EINVAL;
327 }
328
329 switch (rdev->family) {
330 case CHIP_BONAIRE:
331 chip_name = "BONAIRE";
332 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
333 me_req_size = CIK_ME_UCODE_SIZE * 4;
334 ce_req_size = CIK_CE_UCODE_SIZE * 4;
335 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
336 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
337 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400338 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500339 break;
340 case CHIP_KAVERI:
341 chip_name = "KAVERI";
342 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
343 me_req_size = CIK_ME_UCODE_SIZE * 4;
344 ce_req_size = CIK_CE_UCODE_SIZE * 4;
345 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
346 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400347 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500348 break;
349 case CHIP_KABINI:
350 chip_name = "KABINI";
351 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
352 me_req_size = CIK_ME_UCODE_SIZE * 4;
353 ce_req_size = CIK_CE_UCODE_SIZE * 4;
354 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
355 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400356 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500357 break;
358 default: BUG();
359 }
360
361 DRM_INFO("Loading %s Microcode\n", chip_name);
362
363 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
364 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
365 if (err)
366 goto out;
367 if (rdev->pfp_fw->size != pfp_req_size) {
368 printk(KERN_ERR
369 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
370 rdev->pfp_fw->size, fw_name);
371 err = -EINVAL;
372 goto out;
373 }
374
375 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
376 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
377 if (err)
378 goto out;
379 if (rdev->me_fw->size != me_req_size) {
380 printk(KERN_ERR
381 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
382 rdev->me_fw->size, fw_name);
383 err = -EINVAL;
384 }
385
386 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
387 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
388 if (err)
389 goto out;
390 if (rdev->ce_fw->size != ce_req_size) {
391 printk(KERN_ERR
392 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
393 rdev->ce_fw->size, fw_name);
394 err = -EINVAL;
395 }
396
397 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
398 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
399 if (err)
400 goto out;
401 if (rdev->mec_fw->size != mec_req_size) {
402 printk(KERN_ERR
403 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
404 rdev->mec_fw->size, fw_name);
405 err = -EINVAL;
406 }
407
408 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
409 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
410 if (err)
411 goto out;
412 if (rdev->rlc_fw->size != rlc_req_size) {
413 printk(KERN_ERR
414 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
415 rdev->rlc_fw->size, fw_name);
416 err = -EINVAL;
417 }
418
Alex Deucher21a93e12013-04-09 12:47:11 -0400419 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
420 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
421 if (err)
422 goto out;
423 if (rdev->sdma_fw->size != sdma_req_size) {
424 printk(KERN_ERR
425 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
426 rdev->sdma_fw->size, fw_name);
427 err = -EINVAL;
428 }
429
Alex Deucher02c81322012-12-18 21:43:07 -0500430 /* No MC ucode on APUs */
431 if (!(rdev->flags & RADEON_IS_IGP)) {
432 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
433 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
434 if (err)
435 goto out;
436 if (rdev->mc_fw->size != mc_req_size) {
437 printk(KERN_ERR
438 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
439 rdev->mc_fw->size, fw_name);
440 err = -EINVAL;
441 }
442 }
443
444out:
445 platform_device_unregister(pdev);
446
447 if (err) {
448 if (err != -EINVAL)
449 printk(KERN_ERR
450 "cik_cp: Failed to load firmware \"%s\"\n",
451 fw_name);
452 release_firmware(rdev->pfp_fw);
453 rdev->pfp_fw = NULL;
454 release_firmware(rdev->me_fw);
455 rdev->me_fw = NULL;
456 release_firmware(rdev->ce_fw);
457 rdev->ce_fw = NULL;
458 release_firmware(rdev->rlc_fw);
459 rdev->rlc_fw = NULL;
460 release_firmware(rdev->mc_fw);
461 rdev->mc_fw = NULL;
462 }
463 return err;
464}
465
Alex Deucher8cc1a532013-04-09 12:41:24 -0400466/*
467 * Core functions
468 */
469/**
470 * cik_tiling_mode_table_init - init the hw tiling table
471 *
472 * @rdev: radeon_device pointer
473 *
474 * Starting with SI, the tiling setup is done globally in a
475 * set of 32 tiling modes. Rather than selecting each set of
476 * parameters per surface as on older asics, we just select
477 * which index in the tiling table we want to use, and the
478 * surface uses those parameters (CIK).
479 */
480static void cik_tiling_mode_table_init(struct radeon_device *rdev)
481{
482 const u32 num_tile_mode_states = 32;
483 const u32 num_secondary_tile_mode_states = 16;
484 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
485 u32 num_pipe_configs;
486 u32 num_rbs = rdev->config.cik.max_backends_per_se *
487 rdev->config.cik.max_shader_engines;
488
489 switch (rdev->config.cik.mem_row_size_in_kb) {
490 case 1:
491 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
492 break;
493 case 2:
494 default:
495 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
496 break;
497 case 4:
498 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
499 break;
500 }
501
502 num_pipe_configs = rdev->config.cik.max_tile_pipes;
503 if (num_pipe_configs > 8)
504 num_pipe_configs = 8; /* ??? */
505
506 if (num_pipe_configs == 8) {
507 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
508 switch (reg_offset) {
509 case 0:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
514 break;
515 case 1:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
520 break;
521 case 2:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
526 break;
527 case 3:
528 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
532 break;
533 case 4:
534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
537 TILE_SPLIT(split_equal_to_row_size));
538 break;
539 case 5:
540 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
542 break;
543 case 6:
544 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
548 break;
549 case 7:
550 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
553 TILE_SPLIT(split_equal_to_row_size));
554 break;
555 case 8:
556 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
558 break;
559 case 9:
560 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
562 break;
563 case 10:
564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
565 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
568 break;
569 case 11:
570 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 break;
575 case 12:
576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
580 break;
581 case 13:
582 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
584 break;
585 case 14:
586 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
590 break;
591 case 16:
592 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
596 break;
597 case 17:
598 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
602 break;
603 case 27:
604 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
606 break;
607 case 28:
608 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
609 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
612 break;
613 case 29:
614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
616 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
618 break;
619 case 30:
620 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
624 break;
625 default:
626 gb_tile_moden = 0;
627 break;
628 }
629 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
630 }
631 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
632 switch (reg_offset) {
633 case 0:
634 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
637 NUM_BANKS(ADDR_SURF_16_BANK));
638 break;
639 case 1:
640 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
643 NUM_BANKS(ADDR_SURF_16_BANK));
644 break;
645 case 2:
646 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
649 NUM_BANKS(ADDR_SURF_16_BANK));
650 break;
651 case 3:
652 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
655 NUM_BANKS(ADDR_SURF_16_BANK));
656 break;
657 case 4:
658 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
661 NUM_BANKS(ADDR_SURF_8_BANK));
662 break;
663 case 5:
664 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
667 NUM_BANKS(ADDR_SURF_4_BANK));
668 break;
669 case 6:
670 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
673 NUM_BANKS(ADDR_SURF_2_BANK));
674 break;
675 case 8:
676 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
679 NUM_BANKS(ADDR_SURF_16_BANK));
680 break;
681 case 9:
682 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
685 NUM_BANKS(ADDR_SURF_16_BANK));
686 break;
687 case 10:
688 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
691 NUM_BANKS(ADDR_SURF_16_BANK));
692 break;
693 case 11:
694 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
697 NUM_BANKS(ADDR_SURF_16_BANK));
698 break;
699 case 12:
700 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
703 NUM_BANKS(ADDR_SURF_8_BANK));
704 break;
705 case 13:
706 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
709 NUM_BANKS(ADDR_SURF_4_BANK));
710 break;
711 case 14:
712 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
715 NUM_BANKS(ADDR_SURF_2_BANK));
716 break;
717 default:
718 gb_tile_moden = 0;
719 break;
720 }
721 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
722 }
723 } else if (num_pipe_configs == 4) {
724 if (num_rbs == 4) {
725 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
726 switch (reg_offset) {
727 case 0:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
732 break;
733 case 1:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
738 break;
739 case 2:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
744 break;
745 case 3:
746 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
750 break;
751 case 4:
752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
755 TILE_SPLIT(split_equal_to_row_size));
756 break;
757 case 5:
758 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
760 break;
761 case 6:
762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
766 break;
767 case 7:
768 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
769 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
771 TILE_SPLIT(split_equal_to_row_size));
772 break;
773 case 8:
774 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
775 PIPE_CONFIG(ADDR_SURF_P4_16x16));
776 break;
777 case 9:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
780 break;
781 case 10:
782 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
783 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
786 break;
787 case 11:
788 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792 break;
793 case 12:
794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
798 break;
799 case 13:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
802 break;
803 case 14:
804 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
808 break;
809 case 16:
810 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
814 break;
815 case 17:
816 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
820 break;
821 case 27:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
824 break;
825 case 28:
826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
828 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
830 break;
831 case 29:
832 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
836 break;
837 case 30:
838 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
840 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
842 break;
843 default:
844 gb_tile_moden = 0;
845 break;
846 }
847 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
848 }
849 } else if (num_rbs < 4) {
850 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
851 switch (reg_offset) {
852 case 0:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
857 break;
858 case 1:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
863 break;
864 case 2:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
867 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
869 break;
870 case 3:
871 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
873 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
875 break;
876 case 4:
877 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
879 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
880 TILE_SPLIT(split_equal_to_row_size));
881 break;
882 case 5:
883 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
885 break;
886 case 6:
887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
889 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
890 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
891 break;
892 case 7:
893 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
895 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
896 TILE_SPLIT(split_equal_to_row_size));
897 break;
898 case 8:
899 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
900 PIPE_CONFIG(ADDR_SURF_P4_8x16));
901 break;
902 case 9:
903 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
905 break;
906 case 10:
907 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
908 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
909 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
911 break;
912 case 11:
913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
917 break;
918 case 12:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
923 break;
924 case 13:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
927 break;
928 case 14:
929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
931 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
933 break;
934 case 16:
935 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
937 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
939 break;
940 case 17:
941 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
943 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
945 break;
946 case 27:
947 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
949 break;
950 case 28:
951 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
955 break;
956 case 29:
957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
959 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
961 break;
962 case 30:
963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
965 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
967 break;
968 default:
969 gb_tile_moden = 0;
970 break;
971 }
972 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
973 }
974 }
975 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
976 switch (reg_offset) {
977 case 0:
978 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
981 NUM_BANKS(ADDR_SURF_16_BANK));
982 break;
983 case 1:
984 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
987 NUM_BANKS(ADDR_SURF_16_BANK));
988 break;
989 case 2:
990 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
993 NUM_BANKS(ADDR_SURF_16_BANK));
994 break;
995 case 3:
996 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
999 NUM_BANKS(ADDR_SURF_16_BANK));
1000 break;
1001 case 4:
1002 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1005 NUM_BANKS(ADDR_SURF_16_BANK));
1006 break;
1007 case 5:
1008 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1011 NUM_BANKS(ADDR_SURF_8_BANK));
1012 break;
1013 case 6:
1014 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1017 NUM_BANKS(ADDR_SURF_4_BANK));
1018 break;
1019 case 8:
1020 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1023 NUM_BANKS(ADDR_SURF_16_BANK));
1024 break;
1025 case 9:
1026 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1029 NUM_BANKS(ADDR_SURF_16_BANK));
1030 break;
1031 case 10:
1032 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1035 NUM_BANKS(ADDR_SURF_16_BANK));
1036 break;
1037 case 11:
1038 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1041 NUM_BANKS(ADDR_SURF_16_BANK));
1042 break;
1043 case 12:
1044 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1047 NUM_BANKS(ADDR_SURF_16_BANK));
1048 break;
1049 case 13:
1050 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1053 NUM_BANKS(ADDR_SURF_8_BANK));
1054 break;
1055 case 14:
1056 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1059 NUM_BANKS(ADDR_SURF_4_BANK));
1060 break;
1061 default:
1062 gb_tile_moden = 0;
1063 break;
1064 }
1065 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1066 }
1067 } else if (num_pipe_configs == 2) {
1068 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1069 switch (reg_offset) {
1070 case 0:
1071 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1073 PIPE_CONFIG(ADDR_SURF_P2) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1075 break;
1076 case 1:
1077 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1079 PIPE_CONFIG(ADDR_SURF_P2) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1081 break;
1082 case 2:
1083 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1085 PIPE_CONFIG(ADDR_SURF_P2) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1087 break;
1088 case 3:
1089 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1091 PIPE_CONFIG(ADDR_SURF_P2) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1093 break;
1094 case 4:
1095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1097 PIPE_CONFIG(ADDR_SURF_P2) |
1098 TILE_SPLIT(split_equal_to_row_size));
1099 break;
1100 case 5:
1101 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1103 break;
1104 case 6:
1105 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1107 PIPE_CONFIG(ADDR_SURF_P2) |
1108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1109 break;
1110 case 7:
1111 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1113 PIPE_CONFIG(ADDR_SURF_P2) |
1114 TILE_SPLIT(split_equal_to_row_size));
1115 break;
1116 case 8:
1117 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1118 break;
1119 case 9:
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1122 break;
1123 case 10:
1124 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1126 PIPE_CONFIG(ADDR_SURF_P2) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1128 break;
1129 case 11:
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P2) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1134 break;
1135 case 12:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P2) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1140 break;
1141 case 13:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1144 break;
1145 case 14:
1146 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1148 PIPE_CONFIG(ADDR_SURF_P2) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1150 break;
1151 case 16:
1152 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1154 PIPE_CONFIG(ADDR_SURF_P2) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1156 break;
1157 case 17:
1158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1160 PIPE_CONFIG(ADDR_SURF_P2) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1162 break;
1163 case 27:
1164 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1166 break;
1167 case 28:
1168 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1170 PIPE_CONFIG(ADDR_SURF_P2) |
1171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1172 break;
1173 case 29:
1174 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1176 PIPE_CONFIG(ADDR_SURF_P2) |
1177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1178 break;
1179 case 30:
1180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1182 PIPE_CONFIG(ADDR_SURF_P2) |
1183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1184 break;
1185 default:
1186 gb_tile_moden = 0;
1187 break;
1188 }
1189 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1190 }
1191 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1192 switch (reg_offset) {
1193 case 0:
1194 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1197 NUM_BANKS(ADDR_SURF_16_BANK));
1198 break;
1199 case 1:
1200 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1203 NUM_BANKS(ADDR_SURF_16_BANK));
1204 break;
1205 case 2:
1206 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1209 NUM_BANKS(ADDR_SURF_16_BANK));
1210 break;
1211 case 3:
1212 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1215 NUM_BANKS(ADDR_SURF_16_BANK));
1216 break;
1217 case 4:
1218 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1221 NUM_BANKS(ADDR_SURF_16_BANK));
1222 break;
1223 case 5:
1224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1227 NUM_BANKS(ADDR_SURF_16_BANK));
1228 break;
1229 case 6:
1230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1233 NUM_BANKS(ADDR_SURF_8_BANK));
1234 break;
1235 case 8:
1236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1239 NUM_BANKS(ADDR_SURF_16_BANK));
1240 break;
1241 case 9:
1242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1245 NUM_BANKS(ADDR_SURF_16_BANK));
1246 break;
1247 case 10:
1248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1251 NUM_BANKS(ADDR_SURF_16_BANK));
1252 break;
1253 case 11:
1254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1257 NUM_BANKS(ADDR_SURF_16_BANK));
1258 break;
1259 case 12:
1260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1263 NUM_BANKS(ADDR_SURF_16_BANK));
1264 break;
1265 case 13:
1266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1269 NUM_BANKS(ADDR_SURF_16_BANK));
1270 break;
1271 case 14:
1272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1275 NUM_BANKS(ADDR_SURF_8_BANK));
1276 break;
1277 default:
1278 gb_tile_moden = 0;
1279 break;
1280 }
1281 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1282 }
1283 } else
1284 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1285}
1286
1287/**
1288 * cik_select_se_sh - select which SE, SH to address
1289 *
1290 * @rdev: radeon_device pointer
1291 * @se_num: shader engine to address
1292 * @sh_num: sh block to address
1293 *
1294 * Select which SE, SH combinations to address. Certain
1295 * registers are instanced per SE or SH. 0xffffffff means
1296 * broadcast to all SEs or SHs (CIK).
1297 */
1298static void cik_select_se_sh(struct radeon_device *rdev,
1299 u32 se_num, u32 sh_num)
1300{
1301 u32 data = INSTANCE_BROADCAST_WRITES;
1302
1303 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1304 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1305 else if (se_num == 0xffffffff)
1306 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1307 else if (sh_num == 0xffffffff)
1308 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1309 else
1310 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1311 WREG32(GRBM_GFX_INDEX, data);
1312}
1313
1314/**
1315 * cik_create_bitmask - create a bitmask
1316 *
1317 * @bit_width: length of the mask
1318 *
1319 * create a variable length bit mask (CIK).
1320 * Returns the bitmask.
1321 */
1322static u32 cik_create_bitmask(u32 bit_width)
1323{
1324 u32 i, mask = 0;
1325
1326 for (i = 0; i < bit_width; i++) {
1327 mask <<= 1;
1328 mask |= 1;
1329 }
1330 return mask;
1331}
1332
1333/**
1334 * cik_select_se_sh - select which SE, SH to address
1335 *
1336 * @rdev: radeon_device pointer
1337 * @max_rb_num: max RBs (render backends) for the asic
1338 * @se_num: number of SEs (shader engines) for the asic
1339 * @sh_per_se: number of SH blocks per SE for the asic
1340 *
1341 * Calculates the bitmask of disabled RBs (CIK).
1342 * Returns the disabled RB bitmask.
1343 */
1344static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1345 u32 max_rb_num, u32 se_num,
1346 u32 sh_per_se)
1347{
1348 u32 data, mask;
1349
1350 data = RREG32(CC_RB_BACKEND_DISABLE);
1351 if (data & 1)
1352 data &= BACKEND_DISABLE_MASK;
1353 else
1354 data = 0;
1355 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1356
1357 data >>= BACKEND_DISABLE_SHIFT;
1358
1359 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1360
1361 return data & mask;
1362}
1363
1364/**
1365 * cik_setup_rb - setup the RBs on the asic
1366 *
1367 * @rdev: radeon_device pointer
1368 * @se_num: number of SEs (shader engines) for the asic
1369 * @sh_per_se: number of SH blocks per SE for the asic
1370 * @max_rb_num: max RBs (render backends) for the asic
1371 *
1372 * Configures per-SE/SH RB registers (CIK).
1373 */
1374static void cik_setup_rb(struct radeon_device *rdev,
1375 u32 se_num, u32 sh_per_se,
1376 u32 max_rb_num)
1377{
1378 int i, j;
1379 u32 data, mask;
1380 u32 disabled_rbs = 0;
1381 u32 enabled_rbs = 0;
1382
1383 for (i = 0; i < se_num; i++) {
1384 for (j = 0; j < sh_per_se; j++) {
1385 cik_select_se_sh(rdev, i, j);
1386 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1387 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1388 }
1389 }
1390 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1391
1392 mask = 1;
1393 for (i = 0; i < max_rb_num; i++) {
1394 if (!(disabled_rbs & mask))
1395 enabled_rbs |= mask;
1396 mask <<= 1;
1397 }
1398
1399 for (i = 0; i < se_num; i++) {
1400 cik_select_se_sh(rdev, i, 0xffffffff);
1401 data = 0;
1402 for (j = 0; j < sh_per_se; j++) {
1403 switch (enabled_rbs & 3) {
1404 case 1:
1405 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1406 break;
1407 case 2:
1408 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1409 break;
1410 case 3:
1411 default:
1412 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1413 break;
1414 }
1415 enabled_rbs >>= 2;
1416 }
1417 WREG32(PA_SC_RASTER_CONFIG, data);
1418 }
1419 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1420}
1421
1422/**
1423 * cik_gpu_init - setup the 3D engine
1424 *
1425 * @rdev: radeon_device pointer
1426 *
1427 * Configures the 3D engine and tiling configuration
1428 * registers so that the 3D engine is usable.
1429 */
1430static void cik_gpu_init(struct radeon_device *rdev)
1431{
1432 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1433 u32 mc_shared_chmap, mc_arb_ramcfg;
1434 u32 hdp_host_path_cntl;
1435 u32 tmp;
1436 int i, j;
1437
1438 switch (rdev->family) {
1439 case CHIP_BONAIRE:
1440 rdev->config.cik.max_shader_engines = 2;
1441 rdev->config.cik.max_tile_pipes = 4;
1442 rdev->config.cik.max_cu_per_sh = 7;
1443 rdev->config.cik.max_sh_per_se = 1;
1444 rdev->config.cik.max_backends_per_se = 2;
1445 rdev->config.cik.max_texture_channel_caches = 4;
1446 rdev->config.cik.max_gprs = 256;
1447 rdev->config.cik.max_gs_threads = 32;
1448 rdev->config.cik.max_hw_contexts = 8;
1449
1450 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1451 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1452 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1453 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1454 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1455 break;
1456 case CHIP_KAVERI:
1457 /* TODO */
1458 break;
1459 case CHIP_KABINI:
1460 default:
1461 rdev->config.cik.max_shader_engines = 1;
1462 rdev->config.cik.max_tile_pipes = 2;
1463 rdev->config.cik.max_cu_per_sh = 2;
1464 rdev->config.cik.max_sh_per_se = 1;
1465 rdev->config.cik.max_backends_per_se = 1;
1466 rdev->config.cik.max_texture_channel_caches = 2;
1467 rdev->config.cik.max_gprs = 256;
1468 rdev->config.cik.max_gs_threads = 16;
1469 rdev->config.cik.max_hw_contexts = 8;
1470
1471 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1472 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1473 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1474 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1475 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1476 break;
1477 }
1478
1479 /* Initialize HDP */
1480 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1481 WREG32((0x2c14 + j), 0x00000000);
1482 WREG32((0x2c18 + j), 0x00000000);
1483 WREG32((0x2c1c + j), 0x00000000);
1484 WREG32((0x2c20 + j), 0x00000000);
1485 WREG32((0x2c24 + j), 0x00000000);
1486 }
1487
1488 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1489
1490 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1491
1492 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1493 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1494
1495 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1496 rdev->config.cik.mem_max_burst_length_bytes = 256;
1497 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1498 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1499 if (rdev->config.cik.mem_row_size_in_kb > 4)
1500 rdev->config.cik.mem_row_size_in_kb = 4;
1501 /* XXX use MC settings? */
1502 rdev->config.cik.shader_engine_tile_size = 32;
1503 rdev->config.cik.num_gpus = 1;
1504 rdev->config.cik.multi_gpu_tile_size = 64;
1505
1506 /* fix up row size */
1507 gb_addr_config &= ~ROW_SIZE_MASK;
1508 switch (rdev->config.cik.mem_row_size_in_kb) {
1509 case 1:
1510 default:
1511 gb_addr_config |= ROW_SIZE(0);
1512 break;
1513 case 2:
1514 gb_addr_config |= ROW_SIZE(1);
1515 break;
1516 case 4:
1517 gb_addr_config |= ROW_SIZE(2);
1518 break;
1519 }
1520
1521 /* setup tiling info dword. gb_addr_config is not adequate since it does
1522 * not have bank info, so create a custom tiling dword.
1523 * bits 3:0 num_pipes
1524 * bits 7:4 num_banks
1525 * bits 11:8 group_size
1526 * bits 15:12 row_size
1527 */
1528 rdev->config.cik.tile_config = 0;
1529 switch (rdev->config.cik.num_tile_pipes) {
1530 case 1:
1531 rdev->config.cik.tile_config |= (0 << 0);
1532 break;
1533 case 2:
1534 rdev->config.cik.tile_config |= (1 << 0);
1535 break;
1536 case 4:
1537 rdev->config.cik.tile_config |= (2 << 0);
1538 break;
1539 case 8:
1540 default:
1541 /* XXX what about 12? */
1542 rdev->config.cik.tile_config |= (3 << 0);
1543 break;
1544 }
1545 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1546 rdev->config.cik.tile_config |= 1 << 4;
1547 else
1548 rdev->config.cik.tile_config |= 0 << 4;
1549 rdev->config.cik.tile_config |=
1550 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1551 rdev->config.cik.tile_config |=
1552 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1553
1554 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1555 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1556 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001557 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1558 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001559 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1560 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1561 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001562
1563 cik_tiling_mode_table_init(rdev);
1564
1565 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1566 rdev->config.cik.max_sh_per_se,
1567 rdev->config.cik.max_backends_per_se);
1568
1569 /* set HW defaults for 3D engine */
1570 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1571
1572 WREG32(SX_DEBUG_1, 0x20);
1573
1574 WREG32(TA_CNTL_AUX, 0x00010000);
1575
1576 tmp = RREG32(SPI_CONFIG_CNTL);
1577 tmp |= 0x03000000;
1578 WREG32(SPI_CONFIG_CNTL, tmp);
1579
1580 WREG32(SQ_CONFIG, 1);
1581
1582 WREG32(DB_DEBUG, 0);
1583
1584 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1585 tmp |= 0x00000400;
1586 WREG32(DB_DEBUG2, tmp);
1587
1588 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1589 tmp |= 0x00020200;
1590 WREG32(DB_DEBUG3, tmp);
1591
1592 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1593 tmp |= 0x00018208;
1594 WREG32(CB_HW_CONTROL, tmp);
1595
1596 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1597
1598 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1599 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1600 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1601 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1602
1603 WREG32(VGT_NUM_INSTANCES, 1);
1604
1605 WREG32(CP_PERFMON_CNTL, 0);
1606
1607 WREG32(SQ_CONFIG, 0);
1608
1609 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1610 FORCE_EOV_MAX_REZ_CNT(255)));
1611
1612 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1613 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1614
1615 WREG32(VGT_GS_VERTEX_REUSE, 16);
1616 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1617
1618 tmp = RREG32(HDP_MISC_CNTL);
1619 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1620 WREG32(HDP_MISC_CNTL, tmp);
1621
1622 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1623 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1624
1625 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1626 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1627
1628 udelay(50);
1629}
1630
Alex Deucher841cf442012-12-18 21:47:44 -05001631/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001632 * GPU scratch registers helpers function.
1633 */
1634/**
1635 * cik_scratch_init - setup driver info for CP scratch regs
1636 *
1637 * @rdev: radeon_device pointer
1638 *
1639 * Set up the number and offset of the CP scratch registers.
1640 * NOTE: use of CP scratch registers is a legacy inferface and
1641 * is not used by default on newer asics (r6xx+). On newer asics,
1642 * memory buffers are used for fences rather than scratch regs.
1643 */
1644static void cik_scratch_init(struct radeon_device *rdev)
1645{
1646 int i;
1647
1648 rdev->scratch.num_reg = 7;
1649 rdev->scratch.reg_base = SCRATCH_REG0;
1650 for (i = 0; i < rdev->scratch.num_reg; i++) {
1651 rdev->scratch.free[i] = true;
1652 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1653 }
1654}
1655
1656/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001657 * cik_ring_test - basic gfx ring test
1658 *
1659 * @rdev: radeon_device pointer
1660 * @ring: radeon_ring structure holding ring information
1661 *
1662 * Allocate a scratch register and write to it using the gfx ring (CIK).
1663 * Provides a basic gfx ring test to verify that the ring is working.
1664 * Used by cik_cp_gfx_resume();
1665 * Returns 0 on success, error on failure.
1666 */
1667int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1668{
1669 uint32_t scratch;
1670 uint32_t tmp = 0;
1671 unsigned i;
1672 int r;
1673
1674 r = radeon_scratch_get(rdev, &scratch);
1675 if (r) {
1676 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1677 return r;
1678 }
1679 WREG32(scratch, 0xCAFEDEAD);
1680 r = radeon_ring_lock(rdev, ring, 3);
1681 if (r) {
1682 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1683 radeon_scratch_free(rdev, scratch);
1684 return r;
1685 }
1686 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1687 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1688 radeon_ring_write(ring, 0xDEADBEEF);
1689 radeon_ring_unlock_commit(rdev, ring);
1690 for (i = 0; i < rdev->usec_timeout; i++) {
1691 tmp = RREG32(scratch);
1692 if (tmp == 0xDEADBEEF)
1693 break;
1694 DRM_UDELAY(1);
1695 }
1696 if (i < rdev->usec_timeout) {
1697 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1698 } else {
1699 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1700 ring->idx, scratch, tmp);
1701 r = -EINVAL;
1702 }
1703 radeon_scratch_free(rdev, scratch);
1704 return r;
1705}
1706
1707/**
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001708 * cik_fence_ring_emit - emit a fence on the gfx ring
1709 *
1710 * @rdev: radeon_device pointer
1711 * @fence: radeon fence object
1712 *
1713 * Emits a fence sequnce number on the gfx ring and flushes
1714 * GPU caches.
1715 */
1716void cik_fence_ring_emit(struct radeon_device *rdev,
1717 struct radeon_fence *fence)
1718{
1719 struct radeon_ring *ring = &rdev->ring[fence->ring];
1720 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1721
1722 /* EVENT_WRITE_EOP - flush caches, send int */
1723 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1724 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1725 EOP_TC_ACTION_EN |
1726 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1727 EVENT_INDEX(5)));
1728 radeon_ring_write(ring, addr & 0xfffffffc);
1729 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1730 radeon_ring_write(ring, fence->seq);
1731 radeon_ring_write(ring, 0);
1732 /* HDP flush */
1733 /* We should be using the new WAIT_REG_MEM special op packet here
1734 * but it causes the CP to hang
1735 */
1736 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1737 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1738 WRITE_DATA_DST_SEL(0)));
1739 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1740 radeon_ring_write(ring, 0);
1741 radeon_ring_write(ring, 0);
1742}
1743
1744void cik_semaphore_ring_emit(struct radeon_device *rdev,
1745 struct radeon_ring *ring,
1746 struct radeon_semaphore *semaphore,
1747 bool emit_wait)
1748{
1749 uint64_t addr = semaphore->gpu_addr;
1750 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1751
1752 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1753 radeon_ring_write(ring, addr & 0xffffffff);
1754 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1755}
1756
1757/*
1758 * IB stuff
1759 */
1760/**
1761 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1762 *
1763 * @rdev: radeon_device pointer
1764 * @ib: radeon indirect buffer object
1765 *
1766 * Emits an DE (drawing engine) or CE (constant engine) IB
1767 * on the gfx ring. IBs are usually generated by userspace
1768 * acceleration drivers and submitted to the kernel for
1769 * sheduling on the ring. This function schedules the IB
1770 * on the gfx ring for execution by the GPU.
1771 */
1772void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1773{
1774 struct radeon_ring *ring = &rdev->ring[ib->ring];
1775 u32 header, control = INDIRECT_BUFFER_VALID;
1776
1777 if (ib->is_const_ib) {
1778 /* set switch buffer packet before const IB */
1779 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1780 radeon_ring_write(ring, 0);
1781
1782 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1783 } else {
1784 u32 next_rptr;
1785 if (ring->rptr_save_reg) {
1786 next_rptr = ring->wptr + 3 + 4;
1787 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1788 radeon_ring_write(ring, ((ring->rptr_save_reg -
1789 PACKET3_SET_UCONFIG_REG_START) >> 2));
1790 radeon_ring_write(ring, next_rptr);
1791 } else if (rdev->wb.enabled) {
1792 next_rptr = ring->wptr + 5 + 4;
1793 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1794 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1795 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1796 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1797 radeon_ring_write(ring, next_rptr);
1798 }
1799
1800 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1801 }
1802
1803 control |= ib->length_dw |
1804 (ib->vm ? (ib->vm->id << 24) : 0);
1805
1806 radeon_ring_write(ring, header);
1807 radeon_ring_write(ring,
1808#ifdef __BIG_ENDIAN
1809 (2 << 0) |
1810#endif
1811 (ib->gpu_addr & 0xFFFFFFFC));
1812 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1813 radeon_ring_write(ring, control);
1814}
1815
Alex Deucherfbc832c2012-07-20 14:41:35 -04001816/**
1817 * cik_ib_test - basic gfx ring IB test
1818 *
1819 * @rdev: radeon_device pointer
1820 * @ring: radeon_ring structure holding ring information
1821 *
1822 * Allocate an IB and execute it on the gfx ring (CIK).
1823 * Provides a basic gfx ring test to verify that IBs are working.
1824 * Returns 0 on success, error on failure.
1825 */
1826int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1827{
1828 struct radeon_ib ib;
1829 uint32_t scratch;
1830 uint32_t tmp = 0;
1831 unsigned i;
1832 int r;
1833
1834 r = radeon_scratch_get(rdev, &scratch);
1835 if (r) {
1836 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1837 return r;
1838 }
1839 WREG32(scratch, 0xCAFEDEAD);
1840 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1841 if (r) {
1842 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1843 return r;
1844 }
1845 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1846 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1847 ib.ptr[2] = 0xDEADBEEF;
1848 ib.length_dw = 3;
1849 r = radeon_ib_schedule(rdev, &ib, NULL);
1850 if (r) {
1851 radeon_scratch_free(rdev, scratch);
1852 radeon_ib_free(rdev, &ib);
1853 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1854 return r;
1855 }
1856 r = radeon_fence_wait(ib.fence, false);
1857 if (r) {
1858 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1859 return r;
1860 }
1861 for (i = 0; i < rdev->usec_timeout; i++) {
1862 tmp = RREG32(scratch);
1863 if (tmp == 0xDEADBEEF)
1864 break;
1865 DRM_UDELAY(1);
1866 }
1867 if (i < rdev->usec_timeout) {
1868 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1869 } else {
1870 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1871 scratch, tmp);
1872 r = -EINVAL;
1873 }
1874 radeon_scratch_free(rdev, scratch);
1875 radeon_ib_free(rdev, &ib);
1876 return r;
1877}
1878
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001879/*
Alex Deucher841cf442012-12-18 21:47:44 -05001880 * CP.
1881 * On CIK, gfx and compute now have independant command processors.
1882 *
1883 * GFX
1884 * Gfx consists of a single ring and can process both gfx jobs and
1885 * compute jobs. The gfx CP consists of three microengines (ME):
1886 * PFP - Pre-Fetch Parser
1887 * ME - Micro Engine
1888 * CE - Constant Engine
1889 * The PFP and ME make up what is considered the Drawing Engine (DE).
1890 * The CE is an asynchronous engine used for updating buffer desciptors
1891 * used by the DE so that they can be loaded into cache in parallel
1892 * while the DE is processing state update packets.
1893 *
1894 * Compute
1895 * The compute CP consists of two microengines (ME):
1896 * MEC1 - Compute MicroEngine 1
1897 * MEC2 - Compute MicroEngine 2
1898 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1899 * The queues are exposed to userspace and are programmed directly
1900 * by the compute runtime.
1901 */
1902/**
1903 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1904 *
1905 * @rdev: radeon_device pointer
1906 * @enable: enable or disable the MEs
1907 *
1908 * Halts or unhalts the gfx MEs.
1909 */
1910static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1911{
1912 if (enable)
1913 WREG32(CP_ME_CNTL, 0);
1914 else {
1915 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1916 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1917 }
1918 udelay(50);
1919}
1920
1921/**
1922 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1923 *
1924 * @rdev: radeon_device pointer
1925 *
1926 * Loads the gfx PFP, ME, and CE ucode.
1927 * Returns 0 for success, -EINVAL if the ucode is not available.
1928 */
1929static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1930{
1931 const __be32 *fw_data;
1932 int i;
1933
1934 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1935 return -EINVAL;
1936
1937 cik_cp_gfx_enable(rdev, false);
1938
1939 /* PFP */
1940 fw_data = (const __be32 *)rdev->pfp_fw->data;
1941 WREG32(CP_PFP_UCODE_ADDR, 0);
1942 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1943 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1944 WREG32(CP_PFP_UCODE_ADDR, 0);
1945
1946 /* CE */
1947 fw_data = (const __be32 *)rdev->ce_fw->data;
1948 WREG32(CP_CE_UCODE_ADDR, 0);
1949 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1950 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1951 WREG32(CP_CE_UCODE_ADDR, 0);
1952
1953 /* ME */
1954 fw_data = (const __be32 *)rdev->me_fw->data;
1955 WREG32(CP_ME_RAM_WADDR, 0);
1956 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1957 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1958 WREG32(CP_ME_RAM_WADDR, 0);
1959
1960 WREG32(CP_PFP_UCODE_ADDR, 0);
1961 WREG32(CP_CE_UCODE_ADDR, 0);
1962 WREG32(CP_ME_RAM_WADDR, 0);
1963 WREG32(CP_ME_RAM_RADDR, 0);
1964 return 0;
1965}
1966
1967/**
1968 * cik_cp_gfx_start - start the gfx ring
1969 *
1970 * @rdev: radeon_device pointer
1971 *
1972 * Enables the ring and loads the clear state context and other
1973 * packets required to init the ring.
1974 * Returns 0 for success, error for failure.
1975 */
1976static int cik_cp_gfx_start(struct radeon_device *rdev)
1977{
1978 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1979 int r, i;
1980
1981 /* init the CP */
1982 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1983 WREG32(CP_ENDIAN_SWAP, 0);
1984 WREG32(CP_DEVICE_ID, 1);
1985
1986 cik_cp_gfx_enable(rdev, true);
1987
1988 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1989 if (r) {
1990 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1991 return r;
1992 }
1993
1994 /* init the CE partitions. CE only used for gfx on CIK */
1995 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1996 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1997 radeon_ring_write(ring, 0xc000);
1998 radeon_ring_write(ring, 0xc000);
1999
2000 /* setup clear context state */
2001 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2002 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2003
2004 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2005 radeon_ring_write(ring, 0x80000000);
2006 radeon_ring_write(ring, 0x80000000);
2007
2008 for (i = 0; i < cik_default_size; i++)
2009 radeon_ring_write(ring, cik_default_state[i]);
2010
2011 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2012 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2013
2014 /* set clear context state */
2015 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2016 radeon_ring_write(ring, 0);
2017
2018 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2019 radeon_ring_write(ring, 0x00000316);
2020 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2021 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2022
2023 radeon_ring_unlock_commit(rdev, ring);
2024
2025 return 0;
2026}
2027
2028/**
2029 * cik_cp_gfx_fini - stop the gfx ring
2030 *
2031 * @rdev: radeon_device pointer
2032 *
2033 * Stop the gfx ring and tear down the driver ring
2034 * info.
2035 */
2036static void cik_cp_gfx_fini(struct radeon_device *rdev)
2037{
2038 cik_cp_gfx_enable(rdev, false);
2039 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2040}
2041
2042/**
2043 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2044 *
2045 * @rdev: radeon_device pointer
2046 *
2047 * Program the location and size of the gfx ring buffer
2048 * and test it to make sure it's working.
2049 * Returns 0 for success, error for failure.
2050 */
2051static int cik_cp_gfx_resume(struct radeon_device *rdev)
2052{
2053 struct radeon_ring *ring;
2054 u32 tmp;
2055 u32 rb_bufsz;
2056 u64 rb_addr;
2057 int r;
2058
2059 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2060 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2061
2062 /* Set the write pointer delay */
2063 WREG32(CP_RB_WPTR_DELAY, 0);
2064
2065 /* set the RB to use vmid 0 */
2066 WREG32(CP_RB_VMID, 0);
2067
2068 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2069
2070 /* ring 0 - compute and gfx */
2071 /* Set ring buffer size */
2072 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2073 rb_bufsz = drm_order(ring->ring_size / 8);
2074 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2075#ifdef __BIG_ENDIAN
2076 tmp |= BUF_SWAP_32BIT;
2077#endif
2078 WREG32(CP_RB0_CNTL, tmp);
2079
2080 /* Initialize the ring buffer's read and write pointers */
2081 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2082 ring->wptr = 0;
2083 WREG32(CP_RB0_WPTR, ring->wptr);
2084
2085 /* set the wb address wether it's enabled or not */
2086 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2087 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2088
2089 /* scratch register shadowing is no longer supported */
2090 WREG32(SCRATCH_UMSK, 0);
2091
2092 if (!rdev->wb.enabled)
2093 tmp |= RB_NO_UPDATE;
2094
2095 mdelay(1);
2096 WREG32(CP_RB0_CNTL, tmp);
2097
2098 rb_addr = ring->gpu_addr >> 8;
2099 WREG32(CP_RB0_BASE, rb_addr);
2100 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2101
2102 ring->rptr = RREG32(CP_RB0_RPTR);
2103
2104 /* start the ring */
2105 cik_cp_gfx_start(rdev);
2106 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2107 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2108 if (r) {
2109 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2110 return r;
2111 }
2112 return 0;
2113}
2114
2115/**
2116 * cik_cp_compute_enable - enable/disable the compute CP MEs
2117 *
2118 * @rdev: radeon_device pointer
2119 * @enable: enable or disable the MEs
2120 *
2121 * Halts or unhalts the compute MEs.
2122 */
2123static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2124{
2125 if (enable)
2126 WREG32(CP_MEC_CNTL, 0);
2127 else
2128 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2129 udelay(50);
2130}
2131
2132/**
2133 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2134 *
2135 * @rdev: radeon_device pointer
2136 *
2137 * Loads the compute MEC1&2 ucode.
2138 * Returns 0 for success, -EINVAL if the ucode is not available.
2139 */
2140static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2141{
2142 const __be32 *fw_data;
2143 int i;
2144
2145 if (!rdev->mec_fw)
2146 return -EINVAL;
2147
2148 cik_cp_compute_enable(rdev, false);
2149
2150 /* MEC1 */
2151 fw_data = (const __be32 *)rdev->mec_fw->data;
2152 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2153 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2154 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2155 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2156
2157 if (rdev->family == CHIP_KAVERI) {
2158 /* MEC2 */
2159 fw_data = (const __be32 *)rdev->mec_fw->data;
2160 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2161 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2162 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2163 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2164 }
2165
2166 return 0;
2167}
2168
2169/**
2170 * cik_cp_compute_start - start the compute queues
2171 *
2172 * @rdev: radeon_device pointer
2173 *
2174 * Enable the compute queues.
2175 * Returns 0 for success, error for failure.
2176 */
2177static int cik_cp_compute_start(struct radeon_device *rdev)
2178{
2179 //todo
2180 return 0;
2181}
2182
2183/**
2184 * cik_cp_compute_fini - stop the compute queues
2185 *
2186 * @rdev: radeon_device pointer
2187 *
2188 * Stop the compute queues and tear down the driver queue
2189 * info.
2190 */
2191static void cik_cp_compute_fini(struct radeon_device *rdev)
2192{
2193 cik_cp_compute_enable(rdev, false);
2194 //todo
2195}
2196
2197/**
2198 * cik_cp_compute_resume - setup the compute queue registers
2199 *
2200 * @rdev: radeon_device pointer
2201 *
2202 * Program the compute queues and test them to make sure they
2203 * are working.
2204 * Returns 0 for success, error for failure.
2205 */
2206static int cik_cp_compute_resume(struct radeon_device *rdev)
2207{
2208 int r;
2209
2210 //todo
2211 r = cik_cp_compute_start(rdev);
2212 if (r)
2213 return r;
2214 return 0;
2215}
2216
2217/* XXX temporary wrappers to handle both compute and gfx */
2218/* XXX */
2219static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2220{
2221 cik_cp_gfx_enable(rdev, enable);
2222 cik_cp_compute_enable(rdev, enable);
2223}
2224
2225/* XXX */
2226static int cik_cp_load_microcode(struct radeon_device *rdev)
2227{
2228 int r;
2229
2230 r = cik_cp_gfx_load_microcode(rdev);
2231 if (r)
2232 return r;
2233 r = cik_cp_compute_load_microcode(rdev);
2234 if (r)
2235 return r;
2236
2237 return 0;
2238}
2239
2240/* XXX */
2241static void cik_cp_fini(struct radeon_device *rdev)
2242{
2243 cik_cp_gfx_fini(rdev);
2244 cik_cp_compute_fini(rdev);
2245}
2246
2247/* XXX */
2248static int cik_cp_resume(struct radeon_device *rdev)
2249{
2250 int r;
2251
2252 /* Reset all cp blocks */
2253 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2254 RREG32(GRBM_SOFT_RESET);
2255 mdelay(15);
2256 WREG32(GRBM_SOFT_RESET, 0);
2257 RREG32(GRBM_SOFT_RESET);
2258
2259 r = cik_cp_load_microcode(rdev);
2260 if (r)
2261 return r;
2262
2263 r = cik_cp_gfx_resume(rdev);
2264 if (r)
2265 return r;
2266 r = cik_cp_compute_resume(rdev);
2267 if (r)
2268 return r;
2269
2270 return 0;
2271}
2272
Alex Deucher21a93e12013-04-09 12:47:11 -04002273/*
2274 * sDMA - System DMA
2275 * Starting with CIK, the GPU has new asynchronous
2276 * DMA engines. These engines are used for compute
2277 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2278 * and each one supports 1 ring buffer used for gfx
2279 * and 2 queues used for compute.
2280 *
2281 * The programming model is very similar to the CP
2282 * (ring buffer, IBs, etc.), but sDMA has it's own
2283 * packet format that is different from the PM4 format
2284 * used by the CP. sDMA supports copying data, writing
2285 * embedded data, solid fills, and a number of other
2286 * things. It also has support for tiling/detiling of
2287 * buffers.
2288 */
2289/**
2290 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2291 *
2292 * @rdev: radeon_device pointer
2293 * @ib: IB object to schedule
2294 *
2295 * Schedule an IB in the DMA ring (CIK).
2296 */
2297void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2298 struct radeon_ib *ib)
2299{
2300 struct radeon_ring *ring = &rdev->ring[ib->ring];
2301 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2302
2303 if (rdev->wb.enabled) {
2304 u32 next_rptr = ring->wptr + 5;
2305 while ((next_rptr & 7) != 4)
2306 next_rptr++;
2307 next_rptr += 4;
2308 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2309 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2310 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2311 radeon_ring_write(ring, 1); /* number of DWs to follow */
2312 radeon_ring_write(ring, next_rptr);
2313 }
2314
2315 /* IB packet must end on a 8 DW boundary */
2316 while ((ring->wptr & 7) != 4)
2317 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2318 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2319 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2320 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2321 radeon_ring_write(ring, ib->length_dw);
2322
2323}
2324
2325/**
2326 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2327 *
2328 * @rdev: radeon_device pointer
2329 * @fence: radeon fence object
2330 *
2331 * Add a DMA fence packet to the ring to write
2332 * the fence seq number and DMA trap packet to generate
2333 * an interrupt if needed (CIK).
2334 */
2335void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2336 struct radeon_fence *fence)
2337{
2338 struct radeon_ring *ring = &rdev->ring[fence->ring];
2339 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2340 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2341 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2342 u32 ref_and_mask;
2343
2344 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2345 ref_and_mask = SDMA0;
2346 else
2347 ref_and_mask = SDMA1;
2348
2349 /* write the fence */
2350 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2351 radeon_ring_write(ring, addr & 0xffffffff);
2352 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2353 radeon_ring_write(ring, fence->seq);
2354 /* generate an interrupt */
2355 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2356 /* flush HDP */
2357 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2358 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2359 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2360 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2361 radeon_ring_write(ring, ref_and_mask); /* MASK */
2362 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2363}
2364
2365/**
2366 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2367 *
2368 * @rdev: radeon_device pointer
2369 * @ring: radeon_ring structure holding ring information
2370 * @semaphore: radeon semaphore object
2371 * @emit_wait: wait or signal semaphore
2372 *
2373 * Add a DMA semaphore packet to the ring wait on or signal
2374 * other rings (CIK).
2375 */
2376void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2377 struct radeon_ring *ring,
2378 struct radeon_semaphore *semaphore,
2379 bool emit_wait)
2380{
2381 u64 addr = semaphore->gpu_addr;
2382 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2383
2384 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2385 radeon_ring_write(ring, addr & 0xfffffff8);
2386 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2387}
2388
2389/**
2390 * cik_sdma_gfx_stop - stop the gfx async dma engines
2391 *
2392 * @rdev: radeon_device pointer
2393 *
2394 * Stop the gfx async dma ring buffers (CIK).
2395 */
2396static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2397{
2398 u32 rb_cntl, reg_offset;
2399 int i;
2400
2401 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2402
2403 for (i = 0; i < 2; i++) {
2404 if (i == 0)
2405 reg_offset = SDMA0_REGISTER_OFFSET;
2406 else
2407 reg_offset = SDMA1_REGISTER_OFFSET;
2408 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2409 rb_cntl &= ~SDMA_RB_ENABLE;
2410 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2411 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2412 }
2413}
2414
2415/**
2416 * cik_sdma_rlc_stop - stop the compute async dma engines
2417 *
2418 * @rdev: radeon_device pointer
2419 *
2420 * Stop the compute async dma queues (CIK).
2421 */
2422static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2423{
2424 /* XXX todo */
2425}
2426
2427/**
2428 * cik_sdma_enable - stop the async dma engines
2429 *
2430 * @rdev: radeon_device pointer
2431 * @enable: enable/disable the DMA MEs.
2432 *
2433 * Halt or unhalt the async dma engines (CIK).
2434 */
2435static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2436{
2437 u32 me_cntl, reg_offset;
2438 int i;
2439
2440 for (i = 0; i < 2; i++) {
2441 if (i == 0)
2442 reg_offset = SDMA0_REGISTER_OFFSET;
2443 else
2444 reg_offset = SDMA1_REGISTER_OFFSET;
2445 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2446 if (enable)
2447 me_cntl &= ~SDMA_HALT;
2448 else
2449 me_cntl |= SDMA_HALT;
2450 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2451 }
2452}
2453
2454/**
2455 * cik_sdma_gfx_resume - setup and start the async dma engines
2456 *
2457 * @rdev: radeon_device pointer
2458 *
2459 * Set up the gfx DMA ring buffers and enable them (CIK).
2460 * Returns 0 for success, error for failure.
2461 */
2462static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2463{
2464 struct radeon_ring *ring;
2465 u32 rb_cntl, ib_cntl;
2466 u32 rb_bufsz;
2467 u32 reg_offset, wb_offset;
2468 int i, r;
2469
2470 for (i = 0; i < 2; i++) {
2471 if (i == 0) {
2472 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2473 reg_offset = SDMA0_REGISTER_OFFSET;
2474 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2475 } else {
2476 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2477 reg_offset = SDMA1_REGISTER_OFFSET;
2478 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2479 }
2480
2481 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2482 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2483
2484 /* Set ring buffer size in dwords */
2485 rb_bufsz = drm_order(ring->ring_size / 4);
2486 rb_cntl = rb_bufsz << 1;
2487#ifdef __BIG_ENDIAN
2488 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2489#endif
2490 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2491
2492 /* Initialize the ring buffer's read and write pointers */
2493 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2494 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2495
2496 /* set the wb address whether it's enabled or not */
2497 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2498 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2499 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2500 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2501
2502 if (rdev->wb.enabled)
2503 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2504
2505 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2506 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2507
2508 ring->wptr = 0;
2509 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2510
2511 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2512
2513 /* enable DMA RB */
2514 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2515
2516 ib_cntl = SDMA_IB_ENABLE;
2517#ifdef __BIG_ENDIAN
2518 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2519#endif
2520 /* enable DMA IBs */
2521 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2522
2523 ring->ready = true;
2524
2525 r = radeon_ring_test(rdev, ring->idx, ring);
2526 if (r) {
2527 ring->ready = false;
2528 return r;
2529 }
2530 }
2531
2532 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2533
2534 return 0;
2535}
2536
2537/**
2538 * cik_sdma_rlc_resume - setup and start the async dma engines
2539 *
2540 * @rdev: radeon_device pointer
2541 *
2542 * Set up the compute DMA queues and enable them (CIK).
2543 * Returns 0 for success, error for failure.
2544 */
2545static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2546{
2547 /* XXX todo */
2548 return 0;
2549}
2550
2551/**
2552 * cik_sdma_load_microcode - load the sDMA ME ucode
2553 *
2554 * @rdev: radeon_device pointer
2555 *
2556 * Loads the sDMA0/1 ucode.
2557 * Returns 0 for success, -EINVAL if the ucode is not available.
2558 */
2559static int cik_sdma_load_microcode(struct radeon_device *rdev)
2560{
2561 const __be32 *fw_data;
2562 int i;
2563
2564 if (!rdev->sdma_fw)
2565 return -EINVAL;
2566
2567 /* stop the gfx rings and rlc compute queues */
2568 cik_sdma_gfx_stop(rdev);
2569 cik_sdma_rlc_stop(rdev);
2570
2571 /* halt the MEs */
2572 cik_sdma_enable(rdev, false);
2573
2574 /* sdma0 */
2575 fw_data = (const __be32 *)rdev->sdma_fw->data;
2576 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2577 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2578 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2579 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2580
2581 /* sdma1 */
2582 fw_data = (const __be32 *)rdev->sdma_fw->data;
2583 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2584 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2585 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2586 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2587
2588 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2589 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2590 return 0;
2591}
2592
2593/**
2594 * cik_sdma_resume - setup and start the async dma engines
2595 *
2596 * @rdev: radeon_device pointer
2597 *
2598 * Set up the DMA engines and enable them (CIK).
2599 * Returns 0 for success, error for failure.
2600 */
2601static int cik_sdma_resume(struct radeon_device *rdev)
2602{
2603 int r;
2604
2605 /* Reset dma */
2606 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2607 RREG32(SRBM_SOFT_RESET);
2608 udelay(50);
2609 WREG32(SRBM_SOFT_RESET, 0);
2610 RREG32(SRBM_SOFT_RESET);
2611
2612 r = cik_sdma_load_microcode(rdev);
2613 if (r)
2614 return r;
2615
2616 /* unhalt the MEs */
2617 cik_sdma_enable(rdev, true);
2618
2619 /* start the gfx rings and rlc compute queues */
2620 r = cik_sdma_gfx_resume(rdev);
2621 if (r)
2622 return r;
2623 r = cik_sdma_rlc_resume(rdev);
2624 if (r)
2625 return r;
2626
2627 return 0;
2628}
2629
2630/**
2631 * cik_sdma_fini - tear down the async dma engines
2632 *
2633 * @rdev: radeon_device pointer
2634 *
2635 * Stop the async dma engines and free the rings (CIK).
2636 */
2637static void cik_sdma_fini(struct radeon_device *rdev)
2638{
2639 /* stop the gfx rings and rlc compute queues */
2640 cik_sdma_gfx_stop(rdev);
2641 cik_sdma_rlc_stop(rdev);
2642 /* halt the MEs */
2643 cik_sdma_enable(rdev, false);
2644 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2645 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2646 /* XXX - compute dma queue tear down */
2647}
2648
2649/**
2650 * cik_copy_dma - copy pages using the DMA engine
2651 *
2652 * @rdev: radeon_device pointer
2653 * @src_offset: src GPU address
2654 * @dst_offset: dst GPU address
2655 * @num_gpu_pages: number of GPU pages to xfer
2656 * @fence: radeon fence object
2657 *
2658 * Copy GPU paging using the DMA engine (CIK).
2659 * Used by the radeon ttm implementation to move pages if
2660 * registered as the asic copy callback.
2661 */
2662int cik_copy_dma(struct radeon_device *rdev,
2663 uint64_t src_offset, uint64_t dst_offset,
2664 unsigned num_gpu_pages,
2665 struct radeon_fence **fence)
2666{
2667 struct radeon_semaphore *sem = NULL;
2668 int ring_index = rdev->asic->copy.dma_ring_index;
2669 struct radeon_ring *ring = &rdev->ring[ring_index];
2670 u32 size_in_bytes, cur_size_in_bytes;
2671 int i, num_loops;
2672 int r = 0;
2673
2674 r = radeon_semaphore_create(rdev, &sem);
2675 if (r) {
2676 DRM_ERROR("radeon: moving bo (%d).\n", r);
2677 return r;
2678 }
2679
2680 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2681 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2682 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2683 if (r) {
2684 DRM_ERROR("radeon: moving bo (%d).\n", r);
2685 radeon_semaphore_free(rdev, &sem, NULL);
2686 return r;
2687 }
2688
2689 if (radeon_fence_need_sync(*fence, ring->idx)) {
2690 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2691 ring->idx);
2692 radeon_fence_note_sync(*fence, ring->idx);
2693 } else {
2694 radeon_semaphore_free(rdev, &sem, NULL);
2695 }
2696
2697 for (i = 0; i < num_loops; i++) {
2698 cur_size_in_bytes = size_in_bytes;
2699 if (cur_size_in_bytes > 0x1fffff)
2700 cur_size_in_bytes = 0x1fffff;
2701 size_in_bytes -= cur_size_in_bytes;
2702 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2703 radeon_ring_write(ring, cur_size_in_bytes);
2704 radeon_ring_write(ring, 0); /* src/dst endian swap */
2705 radeon_ring_write(ring, src_offset & 0xffffffff);
2706 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2707 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2708 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2709 src_offset += cur_size_in_bytes;
2710 dst_offset += cur_size_in_bytes;
2711 }
2712
2713 r = radeon_fence_emit(rdev, fence, ring->idx);
2714 if (r) {
2715 radeon_ring_unlock_undo(rdev, ring);
2716 return r;
2717 }
2718
2719 radeon_ring_unlock_commit(rdev, ring);
2720 radeon_semaphore_free(rdev, &sem, *fence);
2721
2722 return r;
2723}
2724
2725/**
2726 * cik_sdma_ring_test - simple async dma engine test
2727 *
2728 * @rdev: radeon_device pointer
2729 * @ring: radeon_ring structure holding ring information
2730 *
2731 * Test the DMA engine by writing using it to write an
2732 * value to memory. (CIK).
2733 * Returns 0 for success, error for failure.
2734 */
2735int cik_sdma_ring_test(struct radeon_device *rdev,
2736 struct radeon_ring *ring)
2737{
2738 unsigned i;
2739 int r;
2740 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2741 u32 tmp;
2742
2743 if (!ptr) {
2744 DRM_ERROR("invalid vram scratch pointer\n");
2745 return -EINVAL;
2746 }
2747
2748 tmp = 0xCAFEDEAD;
2749 writel(tmp, ptr);
2750
2751 r = radeon_ring_lock(rdev, ring, 4);
2752 if (r) {
2753 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2754 return r;
2755 }
2756 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2757 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2758 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2759 radeon_ring_write(ring, 1); /* number of DWs to follow */
2760 radeon_ring_write(ring, 0xDEADBEEF);
2761 radeon_ring_unlock_commit(rdev, ring);
2762
2763 for (i = 0; i < rdev->usec_timeout; i++) {
2764 tmp = readl(ptr);
2765 if (tmp == 0xDEADBEEF)
2766 break;
2767 DRM_UDELAY(1);
2768 }
2769
2770 if (i < rdev->usec_timeout) {
2771 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2772 } else {
2773 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2774 ring->idx, tmp);
2775 r = -EINVAL;
2776 }
2777 return r;
2778}
2779
2780/**
2781 * cik_sdma_ib_test - test an IB on the DMA engine
2782 *
2783 * @rdev: radeon_device pointer
2784 * @ring: radeon_ring structure holding ring information
2785 *
2786 * Test a simple IB in the DMA ring (CIK).
2787 * Returns 0 on success, error on failure.
2788 */
2789int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2790{
2791 struct radeon_ib ib;
2792 unsigned i;
2793 int r;
2794 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2795 u32 tmp = 0;
2796
2797 if (!ptr) {
2798 DRM_ERROR("invalid vram scratch pointer\n");
2799 return -EINVAL;
2800 }
2801
2802 tmp = 0xCAFEDEAD;
2803 writel(tmp, ptr);
2804
2805 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2806 if (r) {
2807 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2808 return r;
2809 }
2810
2811 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2812 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2813 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2814 ib.ptr[3] = 1;
2815 ib.ptr[4] = 0xDEADBEEF;
2816 ib.length_dw = 5;
2817
2818 r = radeon_ib_schedule(rdev, &ib, NULL);
2819 if (r) {
2820 radeon_ib_free(rdev, &ib);
2821 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2822 return r;
2823 }
2824 r = radeon_fence_wait(ib.fence, false);
2825 if (r) {
2826 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2827 return r;
2828 }
2829 for (i = 0; i < rdev->usec_timeout; i++) {
2830 tmp = readl(ptr);
2831 if (tmp == 0xDEADBEEF)
2832 break;
2833 DRM_UDELAY(1);
2834 }
2835 if (i < rdev->usec_timeout) {
2836 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2837 } else {
2838 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2839 r = -EINVAL;
2840 }
2841 radeon_ib_free(rdev, &ib);
2842 return r;
2843}
2844
Alex Deuchercc066712013-04-09 12:59:51 -04002845
2846static void cik_print_gpu_status_regs(struct radeon_device *rdev)
2847{
2848 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2849 RREG32(GRBM_STATUS));
2850 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2851 RREG32(GRBM_STATUS2));
2852 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2853 RREG32(GRBM_STATUS_SE0));
2854 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2855 RREG32(GRBM_STATUS_SE1));
2856 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2857 RREG32(GRBM_STATUS_SE2));
2858 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2859 RREG32(GRBM_STATUS_SE3));
2860 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2861 RREG32(SRBM_STATUS));
2862 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2863 RREG32(SRBM_STATUS2));
2864 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
2865 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2866 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
2867 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2868}
2869
Alex Deucher6f2043c2013-04-09 12:43:41 -04002870/**
Alex Deuchercc066712013-04-09 12:59:51 -04002871 * cik_gpu_check_soft_reset - check which blocks are busy
2872 *
2873 * @rdev: radeon_device pointer
2874 *
2875 * Check which blocks are busy and return the relevant reset
2876 * mask to be used by cik_gpu_soft_reset().
2877 * Returns a mask of the blocks to be reset.
2878 */
2879static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2880{
2881 u32 reset_mask = 0;
2882 u32 tmp;
2883
2884 /* GRBM_STATUS */
2885 tmp = RREG32(GRBM_STATUS);
2886 if (tmp & (PA_BUSY | SC_BUSY |
2887 BCI_BUSY | SX_BUSY |
2888 TA_BUSY | VGT_BUSY |
2889 DB_BUSY | CB_BUSY |
2890 GDS_BUSY | SPI_BUSY |
2891 IA_BUSY | IA_BUSY_NO_DMA))
2892 reset_mask |= RADEON_RESET_GFX;
2893
2894 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2895 reset_mask |= RADEON_RESET_CP;
2896
2897 /* GRBM_STATUS2 */
2898 tmp = RREG32(GRBM_STATUS2);
2899 if (tmp & RLC_BUSY)
2900 reset_mask |= RADEON_RESET_RLC;
2901
2902 /* SDMA0_STATUS_REG */
2903 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2904 if (!(tmp & SDMA_IDLE))
2905 reset_mask |= RADEON_RESET_DMA;
2906
2907 /* SDMA1_STATUS_REG */
2908 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2909 if (!(tmp & SDMA_IDLE))
2910 reset_mask |= RADEON_RESET_DMA1;
2911
2912 /* SRBM_STATUS2 */
2913 tmp = RREG32(SRBM_STATUS2);
2914 if (tmp & SDMA_BUSY)
2915 reset_mask |= RADEON_RESET_DMA;
2916
2917 if (tmp & SDMA1_BUSY)
2918 reset_mask |= RADEON_RESET_DMA1;
2919
2920 /* SRBM_STATUS */
2921 tmp = RREG32(SRBM_STATUS);
2922
2923 if (tmp & IH_BUSY)
2924 reset_mask |= RADEON_RESET_IH;
2925
2926 if (tmp & SEM_BUSY)
2927 reset_mask |= RADEON_RESET_SEM;
2928
2929 if (tmp & GRBM_RQ_PENDING)
2930 reset_mask |= RADEON_RESET_GRBM;
2931
2932 if (tmp & VMC_BUSY)
2933 reset_mask |= RADEON_RESET_VMC;
2934
2935 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2936 MCC_BUSY | MCD_BUSY))
2937 reset_mask |= RADEON_RESET_MC;
2938
2939 if (evergreen_is_display_hung(rdev))
2940 reset_mask |= RADEON_RESET_DISPLAY;
2941
2942 /* Skip MC reset as it's mostly likely not hung, just busy */
2943 if (reset_mask & RADEON_RESET_MC) {
2944 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2945 reset_mask &= ~RADEON_RESET_MC;
2946 }
2947
2948 return reset_mask;
2949}
2950
2951/**
2952 * cik_gpu_soft_reset - soft reset GPU
2953 *
2954 * @rdev: radeon_device pointer
2955 * @reset_mask: mask of which blocks to reset
2956 *
2957 * Soft reset the blocks specified in @reset_mask.
2958 */
2959static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2960{
2961 struct evergreen_mc_save save;
2962 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2963 u32 tmp;
2964
2965 if (reset_mask == 0)
2966 return;
2967
2968 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2969
2970 cik_print_gpu_status_regs(rdev);
2971 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2972 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2973 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2974 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2975
2976 /* stop the rlc */
2977 cik_rlc_stop(rdev);
2978
2979 /* Disable GFX parsing/prefetching */
2980 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2981
2982 /* Disable MEC parsing/prefetching */
2983 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2984
2985 if (reset_mask & RADEON_RESET_DMA) {
2986 /* sdma0 */
2987 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2988 tmp |= SDMA_HALT;
2989 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2990 }
2991 if (reset_mask & RADEON_RESET_DMA1) {
2992 /* sdma1 */
2993 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2994 tmp |= SDMA_HALT;
2995 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2996 }
2997
2998 evergreen_mc_stop(rdev, &save);
2999 if (evergreen_mc_wait_for_idle(rdev)) {
3000 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3001 }
3002
3003 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3004 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3005
3006 if (reset_mask & RADEON_RESET_CP) {
3007 grbm_soft_reset |= SOFT_RESET_CP;
3008
3009 srbm_soft_reset |= SOFT_RESET_GRBM;
3010 }
3011
3012 if (reset_mask & RADEON_RESET_DMA)
3013 srbm_soft_reset |= SOFT_RESET_SDMA;
3014
3015 if (reset_mask & RADEON_RESET_DMA1)
3016 srbm_soft_reset |= SOFT_RESET_SDMA1;
3017
3018 if (reset_mask & RADEON_RESET_DISPLAY)
3019 srbm_soft_reset |= SOFT_RESET_DC;
3020
3021 if (reset_mask & RADEON_RESET_RLC)
3022 grbm_soft_reset |= SOFT_RESET_RLC;
3023
3024 if (reset_mask & RADEON_RESET_SEM)
3025 srbm_soft_reset |= SOFT_RESET_SEM;
3026
3027 if (reset_mask & RADEON_RESET_IH)
3028 srbm_soft_reset |= SOFT_RESET_IH;
3029
3030 if (reset_mask & RADEON_RESET_GRBM)
3031 srbm_soft_reset |= SOFT_RESET_GRBM;
3032
3033 if (reset_mask & RADEON_RESET_VMC)
3034 srbm_soft_reset |= SOFT_RESET_VMC;
3035
3036 if (!(rdev->flags & RADEON_IS_IGP)) {
3037 if (reset_mask & RADEON_RESET_MC)
3038 srbm_soft_reset |= SOFT_RESET_MC;
3039 }
3040
3041 if (grbm_soft_reset) {
3042 tmp = RREG32(GRBM_SOFT_RESET);
3043 tmp |= grbm_soft_reset;
3044 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3045 WREG32(GRBM_SOFT_RESET, tmp);
3046 tmp = RREG32(GRBM_SOFT_RESET);
3047
3048 udelay(50);
3049
3050 tmp &= ~grbm_soft_reset;
3051 WREG32(GRBM_SOFT_RESET, tmp);
3052 tmp = RREG32(GRBM_SOFT_RESET);
3053 }
3054
3055 if (srbm_soft_reset) {
3056 tmp = RREG32(SRBM_SOFT_RESET);
3057 tmp |= srbm_soft_reset;
3058 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3059 WREG32(SRBM_SOFT_RESET, tmp);
3060 tmp = RREG32(SRBM_SOFT_RESET);
3061
3062 udelay(50);
3063
3064 tmp &= ~srbm_soft_reset;
3065 WREG32(SRBM_SOFT_RESET, tmp);
3066 tmp = RREG32(SRBM_SOFT_RESET);
3067 }
3068
3069 /* Wait a little for things to settle down */
3070 udelay(50);
3071
3072 evergreen_mc_resume(rdev, &save);
3073 udelay(50);
3074
3075 cik_print_gpu_status_regs(rdev);
3076}
3077
3078/**
3079 * cik_asic_reset - soft reset GPU
3080 *
3081 * @rdev: radeon_device pointer
3082 *
3083 * Look up which blocks are hung and attempt
3084 * to reset them.
3085 * Returns 0 for success.
3086 */
3087int cik_asic_reset(struct radeon_device *rdev)
3088{
3089 u32 reset_mask;
3090
3091 reset_mask = cik_gpu_check_soft_reset(rdev);
3092
3093 if (reset_mask)
3094 r600_set_bios_scratch_engine_hung(rdev, true);
3095
3096 cik_gpu_soft_reset(rdev, reset_mask);
3097
3098 reset_mask = cik_gpu_check_soft_reset(rdev);
3099
3100 if (!reset_mask)
3101 r600_set_bios_scratch_engine_hung(rdev, false);
3102
3103 return 0;
3104}
3105
3106/**
3107 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04003108 *
3109 * @rdev: radeon_device pointer
3110 * @ring: radeon_ring structure holding ring information
3111 *
3112 * Check if the 3D engine is locked up (CIK).
3113 * Returns true if the engine is locked, false if not.
3114 */
Alex Deuchercc066712013-04-09 12:59:51 -04003115bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04003116{
Alex Deuchercc066712013-04-09 12:59:51 -04003117 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04003118
Alex Deuchercc066712013-04-09 12:59:51 -04003119 if (!(reset_mask & (RADEON_RESET_GFX |
3120 RADEON_RESET_COMPUTE |
3121 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04003122 radeon_ring_lockup_update(ring);
3123 return false;
3124 }
3125 /* force CP activities */
3126 radeon_ring_force_activity(rdev, ring);
3127 return radeon_ring_test_lockup(rdev, ring);
3128}
3129
3130/**
Alex Deucher21a93e12013-04-09 12:47:11 -04003131 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3132 *
3133 * @rdev: radeon_device pointer
3134 * @ring: radeon_ring structure holding ring information
3135 *
3136 * Check if the async DMA engine is locked up (CIK).
3137 * Returns true if the engine appears to be locked up, false if not.
3138 */
3139bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3140{
Alex Deuchercc066712013-04-09 12:59:51 -04003141 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3142 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04003143
3144 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04003145 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04003146 else
Alex Deuchercc066712013-04-09 12:59:51 -04003147 mask = RADEON_RESET_DMA1;
3148
3149 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04003150 radeon_ring_lockup_update(ring);
3151 return false;
3152 }
3153 /* force ring activities */
3154 radeon_ring_force_activity(rdev, ring);
3155 return radeon_ring_test_lockup(rdev, ring);
3156}
3157
Alex Deucher1c491652013-04-09 12:45:26 -04003158/* MC */
3159/**
3160 * cik_mc_program - program the GPU memory controller
3161 *
3162 * @rdev: radeon_device pointer
3163 *
3164 * Set the location of vram, gart, and AGP in the GPU's
3165 * physical address space (CIK).
3166 */
3167static void cik_mc_program(struct radeon_device *rdev)
3168{
3169 struct evergreen_mc_save save;
3170 u32 tmp;
3171 int i, j;
3172
3173 /* Initialize HDP */
3174 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3175 WREG32((0x2c14 + j), 0x00000000);
3176 WREG32((0x2c18 + j), 0x00000000);
3177 WREG32((0x2c1c + j), 0x00000000);
3178 WREG32((0x2c20 + j), 0x00000000);
3179 WREG32((0x2c24 + j), 0x00000000);
3180 }
3181 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3182
3183 evergreen_mc_stop(rdev, &save);
3184 if (radeon_mc_wait_for_idle(rdev)) {
3185 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3186 }
3187 /* Lockout access through VGA aperture*/
3188 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3189 /* Update configuration */
3190 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3191 rdev->mc.vram_start >> 12);
3192 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3193 rdev->mc.vram_end >> 12);
3194 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3195 rdev->vram_scratch.gpu_addr >> 12);
3196 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3197 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3198 WREG32(MC_VM_FB_LOCATION, tmp);
3199 /* XXX double check these! */
3200 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3201 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3202 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3203 WREG32(MC_VM_AGP_BASE, 0);
3204 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3205 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3206 if (radeon_mc_wait_for_idle(rdev)) {
3207 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3208 }
3209 evergreen_mc_resume(rdev, &save);
3210 /* we need to own VRAM, so turn off the VGA renderer here
3211 * to stop it overwriting our objects */
3212 rv515_vga_render_disable(rdev);
3213}
3214
3215/**
3216 * cik_mc_init - initialize the memory controller driver params
3217 *
3218 * @rdev: radeon_device pointer
3219 *
3220 * Look up the amount of vram, vram width, and decide how to place
3221 * vram and gart within the GPU's physical address space (CIK).
3222 * Returns 0 for success.
3223 */
3224static int cik_mc_init(struct radeon_device *rdev)
3225{
3226 u32 tmp;
3227 int chansize, numchan;
3228
3229 /* Get VRAM informations */
3230 rdev->mc.vram_is_ddr = true;
3231 tmp = RREG32(MC_ARB_RAMCFG);
3232 if (tmp & CHANSIZE_MASK) {
3233 chansize = 64;
3234 } else {
3235 chansize = 32;
3236 }
3237 tmp = RREG32(MC_SHARED_CHMAP);
3238 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3239 case 0:
3240 default:
3241 numchan = 1;
3242 break;
3243 case 1:
3244 numchan = 2;
3245 break;
3246 case 2:
3247 numchan = 4;
3248 break;
3249 case 3:
3250 numchan = 8;
3251 break;
3252 case 4:
3253 numchan = 3;
3254 break;
3255 case 5:
3256 numchan = 6;
3257 break;
3258 case 6:
3259 numchan = 10;
3260 break;
3261 case 7:
3262 numchan = 12;
3263 break;
3264 case 8:
3265 numchan = 16;
3266 break;
3267 }
3268 rdev->mc.vram_width = numchan * chansize;
3269 /* Could aper size report 0 ? */
3270 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3271 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3272 /* size in MB on si */
3273 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3274 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3275 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3276 si_vram_gtt_location(rdev, &rdev->mc);
3277 radeon_update_bandwidth_info(rdev);
3278
3279 return 0;
3280}
3281
3282/*
3283 * GART
3284 * VMID 0 is the physical GPU addresses as used by the kernel.
3285 * VMIDs 1-15 are used for userspace clients and are handled
3286 * by the radeon vm/hsa code.
3287 */
3288/**
3289 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3290 *
3291 * @rdev: radeon_device pointer
3292 *
3293 * Flush the TLB for the VMID 0 page table (CIK).
3294 */
3295void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3296{
3297 /* flush hdp cache */
3298 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3299
3300 /* bits 0-15 are the VM contexts0-15 */
3301 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3302}
3303
3304/**
3305 * cik_pcie_gart_enable - gart enable
3306 *
3307 * @rdev: radeon_device pointer
3308 *
3309 * This sets up the TLBs, programs the page tables for VMID0,
3310 * sets up the hw for VMIDs 1-15 which are allocated on
3311 * demand, and sets up the global locations for the LDS, GDS,
3312 * and GPUVM for FSA64 clients (CIK).
3313 * Returns 0 for success, errors for failure.
3314 */
3315static int cik_pcie_gart_enable(struct radeon_device *rdev)
3316{
3317 int r, i;
3318
3319 if (rdev->gart.robj == NULL) {
3320 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3321 return -EINVAL;
3322 }
3323 r = radeon_gart_table_vram_pin(rdev);
3324 if (r)
3325 return r;
3326 radeon_gart_restore(rdev);
3327 /* Setup TLB control */
3328 WREG32(MC_VM_MX_L1_TLB_CNTL,
3329 (0xA << 7) |
3330 ENABLE_L1_TLB |
3331 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3332 ENABLE_ADVANCED_DRIVER_MODEL |
3333 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3334 /* Setup L2 cache */
3335 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3336 ENABLE_L2_FRAGMENT_PROCESSING |
3337 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3338 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3339 EFFECTIVE_L2_QUEUE_SIZE(7) |
3340 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3341 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3342 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3343 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3344 /* setup context0 */
3345 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3346 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3347 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3348 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3349 (u32)(rdev->dummy_page.addr >> 12));
3350 WREG32(VM_CONTEXT0_CNTL2, 0);
3351 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3352 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3353
3354 WREG32(0x15D4, 0);
3355 WREG32(0x15D8, 0);
3356 WREG32(0x15DC, 0);
3357
3358 /* empty context1-15 */
3359 /* FIXME start with 4G, once using 2 level pt switch to full
3360 * vm size space
3361 */
3362 /* set vm size, must be a multiple of 4 */
3363 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3364 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3365 for (i = 1; i < 16; i++) {
3366 if (i < 8)
3367 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3368 rdev->gart.table_addr >> 12);
3369 else
3370 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3371 rdev->gart.table_addr >> 12);
3372 }
3373
3374 /* enable context1-15 */
3375 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3376 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003377 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003378 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003379 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3380 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3381 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3382 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3383 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3384 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3385 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3386 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3387 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3388 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3389 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3390 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003391
3392 /* TC cache setup ??? */
3393 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3394 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3395 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3396
3397 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3398 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3399 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3400 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3401 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3402
3403 WREG32(TC_CFG_L1_VOLATILE, 0);
3404 WREG32(TC_CFG_L2_VOLATILE, 0);
3405
3406 if (rdev->family == CHIP_KAVERI) {
3407 u32 tmp = RREG32(CHUB_CONTROL);
3408 tmp &= ~BYPASS_VM;
3409 WREG32(CHUB_CONTROL, tmp);
3410 }
3411
3412 /* XXX SH_MEM regs */
3413 /* where to put LDS, scratch, GPUVM in FSA64 space */
3414 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05003415 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04003416 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003417 WREG32(SH_MEM_CONFIG, 0);
3418 WREG32(SH_MEM_APE1_BASE, 1);
3419 WREG32(SH_MEM_APE1_LIMIT, 0);
3420 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003421 /* SDMA GFX */
3422 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3423 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3424 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3425 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3426 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003427 }
Alex Deucherb556b122013-01-29 10:44:22 -05003428 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04003429
3430 cik_pcie_gart_tlb_flush(rdev);
3431 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3432 (unsigned)(rdev->mc.gtt_size >> 20),
3433 (unsigned long long)rdev->gart.table_addr);
3434 rdev->gart.ready = true;
3435 return 0;
3436}
3437
3438/**
3439 * cik_pcie_gart_disable - gart disable
3440 *
3441 * @rdev: radeon_device pointer
3442 *
3443 * This disables all VM page table (CIK).
3444 */
3445static void cik_pcie_gart_disable(struct radeon_device *rdev)
3446{
3447 /* Disable all tables */
3448 WREG32(VM_CONTEXT0_CNTL, 0);
3449 WREG32(VM_CONTEXT1_CNTL, 0);
3450 /* Setup TLB control */
3451 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3452 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3453 /* Setup L2 cache */
3454 WREG32(VM_L2_CNTL,
3455 ENABLE_L2_FRAGMENT_PROCESSING |
3456 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3457 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3458 EFFECTIVE_L2_QUEUE_SIZE(7) |
3459 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3460 WREG32(VM_L2_CNTL2, 0);
3461 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3462 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3463 radeon_gart_table_vram_unpin(rdev);
3464}
3465
3466/**
3467 * cik_pcie_gart_fini - vm fini callback
3468 *
3469 * @rdev: radeon_device pointer
3470 *
3471 * Tears down the driver GART/VM setup (CIK).
3472 */
3473static void cik_pcie_gart_fini(struct radeon_device *rdev)
3474{
3475 cik_pcie_gart_disable(rdev);
3476 radeon_gart_table_vram_free(rdev);
3477 radeon_gart_fini(rdev);
3478}
3479
3480/* vm parser */
3481/**
3482 * cik_ib_parse - vm ib_parse callback
3483 *
3484 * @rdev: radeon_device pointer
3485 * @ib: indirect buffer pointer
3486 *
3487 * CIK uses hw IB checking so this is a nop (CIK).
3488 */
3489int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3490{
3491 return 0;
3492}
3493
3494/*
3495 * vm
3496 * VMID 0 is the physical GPU addresses as used by the kernel.
3497 * VMIDs 1-15 are used for userspace clients and are handled
3498 * by the radeon vm/hsa code.
3499 */
3500/**
3501 * cik_vm_init - cik vm init callback
3502 *
3503 * @rdev: radeon_device pointer
3504 *
3505 * Inits cik specific vm parameters (number of VMs, base of vram for
3506 * VMIDs 1-15) (CIK).
3507 * Returns 0 for success.
3508 */
3509int cik_vm_init(struct radeon_device *rdev)
3510{
3511 /* number of VMs */
3512 rdev->vm_manager.nvm = 16;
3513 /* base offset of vram pages */
3514 if (rdev->flags & RADEON_IS_IGP) {
3515 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3516 tmp <<= 22;
3517 rdev->vm_manager.vram_base_offset = tmp;
3518 } else
3519 rdev->vm_manager.vram_base_offset = 0;
3520
3521 return 0;
3522}
3523
3524/**
3525 * cik_vm_fini - cik vm fini callback
3526 *
3527 * @rdev: radeon_device pointer
3528 *
3529 * Tear down any asic specific VM setup (CIK).
3530 */
3531void cik_vm_fini(struct radeon_device *rdev)
3532{
3533}
3534
Alex Deucherf96ab482012-08-31 10:37:47 -04003535/**
3536 * cik_vm_flush - cik vm flush using the CP
3537 *
3538 * @rdev: radeon_device pointer
3539 *
3540 * Update the page table base and flush the VM TLB
3541 * using the CP (CIK).
3542 */
3543void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3544{
3545 struct radeon_ring *ring = &rdev->ring[ridx];
3546
3547 if (vm == NULL)
3548 return;
3549
3550 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3551 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3552 WRITE_DATA_DST_SEL(0)));
3553 if (vm->id < 8) {
3554 radeon_ring_write(ring,
3555 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3556 } else {
3557 radeon_ring_write(ring,
3558 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3559 }
3560 radeon_ring_write(ring, 0);
3561 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3562
3563 /* update SH_MEM_* regs */
3564 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3565 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3566 WRITE_DATA_DST_SEL(0)));
3567 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3568 radeon_ring_write(ring, 0);
3569 radeon_ring_write(ring, VMID(vm->id));
3570
3571 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3572 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3573 WRITE_DATA_DST_SEL(0)));
3574 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3575 radeon_ring_write(ring, 0);
3576
3577 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3578 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3579 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3580 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3581
3582 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3583 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3584 WRITE_DATA_DST_SEL(0)));
3585 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3586 radeon_ring_write(ring, 0);
3587 radeon_ring_write(ring, VMID(0));
3588
3589 /* HDP flush */
3590 /* We should be using the WAIT_REG_MEM packet here like in
3591 * cik_fence_ring_emit(), but it causes the CP to hang in this
3592 * context...
3593 */
3594 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3595 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3596 WRITE_DATA_DST_SEL(0)));
3597 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3598 radeon_ring_write(ring, 0);
3599 radeon_ring_write(ring, 0);
3600
3601 /* bits 0-15 are the VM contexts0-15 */
3602 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3603 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3604 WRITE_DATA_DST_SEL(0)));
3605 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3606 radeon_ring_write(ring, 0);
3607 radeon_ring_write(ring, 1 << vm->id);
3608
3609 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3610 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3611 radeon_ring_write(ring, 0x0);
3612}
3613
Alex Deucher605de6b2012-10-22 13:04:03 -04003614/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04003615 * cik_vm_set_page - update the page tables using sDMA
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ib: indirect buffer to fill with commands
3619 * @pe: addr of the page entry
3620 * @addr: dst addr to write into pe
3621 * @count: number of page entries to update
3622 * @incr: increase next addr by incr bytes
3623 * @flags: access flags
3624 *
3625 * Update the page tables using CP or sDMA (CIK).
3626 */
3627void cik_vm_set_page(struct radeon_device *rdev,
3628 struct radeon_ib *ib,
3629 uint64_t pe,
3630 uint64_t addr, unsigned count,
3631 uint32_t incr, uint32_t flags)
3632{
3633 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3634 uint64_t value;
3635 unsigned ndw;
3636
3637 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3638 /* CP */
3639 while (count) {
3640 ndw = 2 + count * 2;
3641 if (ndw > 0x3FFE)
3642 ndw = 0x3FFE;
3643
3644 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3645 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3646 WRITE_DATA_DST_SEL(1));
3647 ib->ptr[ib->length_dw++] = pe;
3648 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3649 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3650 if (flags & RADEON_VM_PAGE_SYSTEM) {
3651 value = radeon_vm_map_gart(rdev, addr);
3652 value &= 0xFFFFFFFFFFFFF000ULL;
3653 } else if (flags & RADEON_VM_PAGE_VALID) {
3654 value = addr;
3655 } else {
3656 value = 0;
3657 }
3658 addr += incr;
3659 value |= r600_flags;
3660 ib->ptr[ib->length_dw++] = value;
3661 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3662 }
3663 }
3664 } else {
3665 /* DMA */
3666 if (flags & RADEON_VM_PAGE_SYSTEM) {
3667 while (count) {
3668 ndw = count * 2;
3669 if (ndw > 0xFFFFE)
3670 ndw = 0xFFFFE;
3671
3672 /* for non-physically contiguous pages (system) */
3673 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3674 ib->ptr[ib->length_dw++] = pe;
3675 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3676 ib->ptr[ib->length_dw++] = ndw;
3677 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3678 if (flags & RADEON_VM_PAGE_SYSTEM) {
3679 value = radeon_vm_map_gart(rdev, addr);
3680 value &= 0xFFFFFFFFFFFFF000ULL;
3681 } else if (flags & RADEON_VM_PAGE_VALID) {
3682 value = addr;
3683 } else {
3684 value = 0;
3685 }
3686 addr += incr;
3687 value |= r600_flags;
3688 ib->ptr[ib->length_dw++] = value;
3689 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3690 }
3691 }
3692 } else {
3693 while (count) {
3694 ndw = count;
3695 if (ndw > 0x7FFFF)
3696 ndw = 0x7FFFF;
3697
3698 if (flags & RADEON_VM_PAGE_VALID)
3699 value = addr;
3700 else
3701 value = 0;
3702 /* for physically contiguous pages (vram) */
3703 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3704 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3705 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3706 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3707 ib->ptr[ib->length_dw++] = 0;
3708 ib->ptr[ib->length_dw++] = value; /* value */
3709 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3710 ib->ptr[ib->length_dw++] = incr; /* increment size */
3711 ib->ptr[ib->length_dw++] = 0;
3712 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3713 pe += ndw * 8;
3714 addr += ndw * incr;
3715 count -= ndw;
3716 }
3717 }
3718 while (ib->length_dw & 0x7)
3719 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3720 }
3721}
3722
3723/**
Alex Deucher605de6b2012-10-22 13:04:03 -04003724 * cik_dma_vm_flush - cik vm flush using sDMA
3725 *
3726 * @rdev: radeon_device pointer
3727 *
3728 * Update the page table base and flush the VM TLB
3729 * using sDMA (CIK).
3730 */
3731void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3732{
3733 struct radeon_ring *ring = &rdev->ring[ridx];
3734 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3735 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3736 u32 ref_and_mask;
3737
3738 if (vm == NULL)
3739 return;
3740
3741 if (ridx == R600_RING_TYPE_DMA_INDEX)
3742 ref_and_mask = SDMA0;
3743 else
3744 ref_and_mask = SDMA1;
3745
3746 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3747 if (vm->id < 8) {
3748 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3749 } else {
3750 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3751 }
3752 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3753
3754 /* update SH_MEM_* regs */
3755 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3756 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3757 radeon_ring_write(ring, VMID(vm->id));
3758
3759 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3760 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3761 radeon_ring_write(ring, 0);
3762
3763 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3764 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3765 radeon_ring_write(ring, 0);
3766
3767 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3768 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3769 radeon_ring_write(ring, 1);
3770
3771 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3772 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3773 radeon_ring_write(ring, 0);
3774
3775 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3776 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3777 radeon_ring_write(ring, VMID(0));
3778
3779 /* flush HDP */
3780 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3781 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3782 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3783 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3784 radeon_ring_write(ring, ref_and_mask); /* MASK */
3785 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3786
3787 /* flush TLB */
3788 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3789 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3790 radeon_ring_write(ring, 1 << vm->id);
3791}
3792
Alex Deucherf6796ca2012-11-09 10:44:08 -05003793/*
3794 * RLC
3795 * The RLC is a multi-purpose microengine that handles a
3796 * variety of functions, the most important of which is
3797 * the interrupt controller.
3798 */
3799/**
3800 * cik_rlc_stop - stop the RLC ME
3801 *
3802 * @rdev: radeon_device pointer
3803 *
3804 * Halt the RLC ME (MicroEngine) (CIK).
3805 */
3806static void cik_rlc_stop(struct radeon_device *rdev)
3807{
3808 int i, j, k;
3809 u32 mask, tmp;
3810
3811 tmp = RREG32(CP_INT_CNTL_RING0);
3812 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3813 WREG32(CP_INT_CNTL_RING0, tmp);
3814
3815 RREG32(CB_CGTT_SCLK_CTRL);
3816 RREG32(CB_CGTT_SCLK_CTRL);
3817 RREG32(CB_CGTT_SCLK_CTRL);
3818 RREG32(CB_CGTT_SCLK_CTRL);
3819
3820 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3821 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3822
3823 WREG32(RLC_CNTL, 0);
3824
3825 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3826 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3827 cik_select_se_sh(rdev, i, j);
3828 for (k = 0; k < rdev->usec_timeout; k++) {
3829 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3830 break;
3831 udelay(1);
3832 }
3833 }
3834 }
3835 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3836
3837 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3838 for (k = 0; k < rdev->usec_timeout; k++) {
3839 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3840 break;
3841 udelay(1);
3842 }
3843}
3844
3845/**
3846 * cik_rlc_start - start the RLC ME
3847 *
3848 * @rdev: radeon_device pointer
3849 *
3850 * Unhalt the RLC ME (MicroEngine) (CIK).
3851 */
3852static void cik_rlc_start(struct radeon_device *rdev)
3853{
3854 u32 tmp;
3855
3856 WREG32(RLC_CNTL, RLC_ENABLE);
3857
3858 tmp = RREG32(CP_INT_CNTL_RING0);
3859 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3860 WREG32(CP_INT_CNTL_RING0, tmp);
3861
3862 udelay(50);
3863}
3864
3865/**
3866 * cik_rlc_resume - setup the RLC hw
3867 *
3868 * @rdev: radeon_device pointer
3869 *
3870 * Initialize the RLC registers, load the ucode,
3871 * and start the RLC (CIK).
3872 * Returns 0 for success, -EINVAL if the ucode is not available.
3873 */
3874static int cik_rlc_resume(struct radeon_device *rdev)
3875{
3876 u32 i, size;
3877 u32 clear_state_info[3];
3878 const __be32 *fw_data;
3879
3880 if (!rdev->rlc_fw)
3881 return -EINVAL;
3882
3883 switch (rdev->family) {
3884 case CHIP_BONAIRE:
3885 default:
3886 size = BONAIRE_RLC_UCODE_SIZE;
3887 break;
3888 case CHIP_KAVERI:
3889 size = KV_RLC_UCODE_SIZE;
3890 break;
3891 case CHIP_KABINI:
3892 size = KB_RLC_UCODE_SIZE;
3893 break;
3894 }
3895
3896 cik_rlc_stop(rdev);
3897
3898 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3899 RREG32(GRBM_SOFT_RESET);
3900 udelay(50);
3901 WREG32(GRBM_SOFT_RESET, 0);
3902 RREG32(GRBM_SOFT_RESET);
3903 udelay(50);
3904
3905 WREG32(RLC_LB_CNTR_INIT, 0);
3906 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3907
3908 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3909 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3910 WREG32(RLC_LB_PARAMS, 0x00600408);
3911 WREG32(RLC_LB_CNTL, 0x80000004);
3912
3913 WREG32(RLC_MC_CNTL, 0);
3914 WREG32(RLC_UCODE_CNTL, 0);
3915
3916 fw_data = (const __be32 *)rdev->rlc_fw->data;
3917 WREG32(RLC_GPM_UCODE_ADDR, 0);
3918 for (i = 0; i < size; i++)
3919 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3920 WREG32(RLC_GPM_UCODE_ADDR, 0);
3921
3922 /* XXX */
3923 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3924 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3925 clear_state_info[2] = 0;//cik_default_size;
3926 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3927 for (i = 0; i < 3; i++)
3928 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3929 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3930
3931 cik_rlc_start(rdev);
3932
3933 return 0;
3934}
Alex Deuchera59781b2012-11-09 10:45:57 -05003935
3936/*
3937 * Interrupts
3938 * Starting with r6xx, interrupts are handled via a ring buffer.
3939 * Ring buffers are areas of GPU accessible memory that the GPU
3940 * writes interrupt vectors into and the host reads vectors out of.
3941 * There is a rptr (read pointer) that determines where the
3942 * host is currently reading, and a wptr (write pointer)
3943 * which determines where the GPU has written. When the
3944 * pointers are equal, the ring is idle. When the GPU
3945 * writes vectors to the ring buffer, it increments the
3946 * wptr. When there is an interrupt, the host then starts
3947 * fetching commands and processing them until the pointers are
3948 * equal again at which point it updates the rptr.
3949 */
3950
3951/**
3952 * cik_enable_interrupts - Enable the interrupt ring buffer
3953 *
3954 * @rdev: radeon_device pointer
3955 *
3956 * Enable the interrupt ring buffer (CIK).
3957 */
3958static void cik_enable_interrupts(struct radeon_device *rdev)
3959{
3960 u32 ih_cntl = RREG32(IH_CNTL);
3961 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3962
3963 ih_cntl |= ENABLE_INTR;
3964 ih_rb_cntl |= IH_RB_ENABLE;
3965 WREG32(IH_CNTL, ih_cntl);
3966 WREG32(IH_RB_CNTL, ih_rb_cntl);
3967 rdev->ih.enabled = true;
3968}
3969
3970/**
3971 * cik_disable_interrupts - Disable the interrupt ring buffer
3972 *
3973 * @rdev: radeon_device pointer
3974 *
3975 * Disable the interrupt ring buffer (CIK).
3976 */
3977static void cik_disable_interrupts(struct radeon_device *rdev)
3978{
3979 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3980 u32 ih_cntl = RREG32(IH_CNTL);
3981
3982 ih_rb_cntl &= ~IH_RB_ENABLE;
3983 ih_cntl &= ~ENABLE_INTR;
3984 WREG32(IH_RB_CNTL, ih_rb_cntl);
3985 WREG32(IH_CNTL, ih_cntl);
3986 /* set rptr, wptr to 0 */
3987 WREG32(IH_RB_RPTR, 0);
3988 WREG32(IH_RB_WPTR, 0);
3989 rdev->ih.enabled = false;
3990 rdev->ih.rptr = 0;
3991}
3992
3993/**
3994 * cik_disable_interrupt_state - Disable all interrupt sources
3995 *
3996 * @rdev: radeon_device pointer
3997 *
3998 * Clear all interrupt enable bits used by the driver (CIK).
3999 */
4000static void cik_disable_interrupt_state(struct radeon_device *rdev)
4001{
4002 u32 tmp;
4003
4004 /* gfx ring */
4005 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004006 /* sdma */
4007 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4008 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4009 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4010 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004011 /* compute queues */
4012 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4013 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4014 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4015 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4016 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4017 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4018 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4019 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4020 /* grbm */
4021 WREG32(GRBM_INT_CNTL, 0);
4022 /* vline/vblank, etc. */
4023 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4024 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4025 if (rdev->num_crtc >= 4) {
4026 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4027 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4028 }
4029 if (rdev->num_crtc >= 6) {
4030 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4031 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4032 }
4033
4034 /* dac hotplug */
4035 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4036
4037 /* digital hotplug */
4038 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4039 WREG32(DC_HPD1_INT_CONTROL, tmp);
4040 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4041 WREG32(DC_HPD2_INT_CONTROL, tmp);
4042 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4043 WREG32(DC_HPD3_INT_CONTROL, tmp);
4044 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4045 WREG32(DC_HPD4_INT_CONTROL, tmp);
4046 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4047 WREG32(DC_HPD5_INT_CONTROL, tmp);
4048 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4049 WREG32(DC_HPD6_INT_CONTROL, tmp);
4050
4051}
4052
4053/**
4054 * cik_irq_init - init and enable the interrupt ring
4055 *
4056 * @rdev: radeon_device pointer
4057 *
4058 * Allocate a ring buffer for the interrupt controller,
4059 * enable the RLC, disable interrupts, enable the IH
4060 * ring buffer and enable it (CIK).
4061 * Called at device load and reume.
4062 * Returns 0 for success, errors for failure.
4063 */
4064static int cik_irq_init(struct radeon_device *rdev)
4065{
4066 int ret = 0;
4067 int rb_bufsz;
4068 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4069
4070 /* allocate ring */
4071 ret = r600_ih_ring_alloc(rdev);
4072 if (ret)
4073 return ret;
4074
4075 /* disable irqs */
4076 cik_disable_interrupts(rdev);
4077
4078 /* init rlc */
4079 ret = cik_rlc_resume(rdev);
4080 if (ret) {
4081 r600_ih_ring_fini(rdev);
4082 return ret;
4083 }
4084
4085 /* setup interrupt control */
4086 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4087 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4088 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4089 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4090 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4091 */
4092 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4093 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4094 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4095 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4096
4097 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4098 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4099
4100 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4101 IH_WPTR_OVERFLOW_CLEAR |
4102 (rb_bufsz << 1));
4103
4104 if (rdev->wb.enabled)
4105 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4106
4107 /* set the writeback address whether it's enabled or not */
4108 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4109 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4110
4111 WREG32(IH_RB_CNTL, ih_rb_cntl);
4112
4113 /* set rptr, wptr to 0 */
4114 WREG32(IH_RB_RPTR, 0);
4115 WREG32(IH_RB_WPTR, 0);
4116
4117 /* Default settings for IH_CNTL (disabled at first) */
4118 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4119 /* RPTR_REARM only works if msi's are enabled */
4120 if (rdev->msi_enabled)
4121 ih_cntl |= RPTR_REARM;
4122 WREG32(IH_CNTL, ih_cntl);
4123
4124 /* force the active interrupt state to all disabled */
4125 cik_disable_interrupt_state(rdev);
4126
4127 pci_set_master(rdev->pdev);
4128
4129 /* enable irqs */
4130 cik_enable_interrupts(rdev);
4131
4132 return ret;
4133}
4134
4135/**
4136 * cik_irq_set - enable/disable interrupt sources
4137 *
4138 * @rdev: radeon_device pointer
4139 *
4140 * Enable interrupt sources on the GPU (vblanks, hpd,
4141 * etc.) (CIK).
4142 * Returns 0 for success, errors for failure.
4143 */
4144int cik_irq_set(struct radeon_device *rdev)
4145{
4146 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4147 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4148 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4149 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4150 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04004151 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05004152
4153 if (!rdev->irq.installed) {
4154 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4155 return -EINVAL;
4156 }
4157 /* don't enable anything if the ih is disabled */
4158 if (!rdev->ih.enabled) {
4159 cik_disable_interrupts(rdev);
4160 /* force the active interrupt state to all disabled */
4161 cik_disable_interrupt_state(rdev);
4162 return 0;
4163 }
4164
4165 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4166 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4167 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4168 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4169 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4170 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4171
Alex Deucher21a93e12013-04-09 12:47:11 -04004172 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4173 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4174
Alex Deuchera59781b2012-11-09 10:45:57 -05004175 /* enable CP interrupts on all rings */
4176 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4177 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4178 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4179 }
4180 /* TODO: compute queues! */
4181 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4182
Alex Deucher21a93e12013-04-09 12:47:11 -04004183 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4184 DRM_DEBUG("cik_irq_set: sw int dma\n");
4185 dma_cntl |= TRAP_ENABLE;
4186 }
4187
4188 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4189 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4190 dma_cntl1 |= TRAP_ENABLE;
4191 }
4192
Alex Deuchera59781b2012-11-09 10:45:57 -05004193 if (rdev->irq.crtc_vblank_int[0] ||
4194 atomic_read(&rdev->irq.pflip[0])) {
4195 DRM_DEBUG("cik_irq_set: vblank 0\n");
4196 crtc1 |= VBLANK_INTERRUPT_MASK;
4197 }
4198 if (rdev->irq.crtc_vblank_int[1] ||
4199 atomic_read(&rdev->irq.pflip[1])) {
4200 DRM_DEBUG("cik_irq_set: vblank 1\n");
4201 crtc2 |= VBLANK_INTERRUPT_MASK;
4202 }
4203 if (rdev->irq.crtc_vblank_int[2] ||
4204 atomic_read(&rdev->irq.pflip[2])) {
4205 DRM_DEBUG("cik_irq_set: vblank 2\n");
4206 crtc3 |= VBLANK_INTERRUPT_MASK;
4207 }
4208 if (rdev->irq.crtc_vblank_int[3] ||
4209 atomic_read(&rdev->irq.pflip[3])) {
4210 DRM_DEBUG("cik_irq_set: vblank 3\n");
4211 crtc4 |= VBLANK_INTERRUPT_MASK;
4212 }
4213 if (rdev->irq.crtc_vblank_int[4] ||
4214 atomic_read(&rdev->irq.pflip[4])) {
4215 DRM_DEBUG("cik_irq_set: vblank 4\n");
4216 crtc5 |= VBLANK_INTERRUPT_MASK;
4217 }
4218 if (rdev->irq.crtc_vblank_int[5] ||
4219 atomic_read(&rdev->irq.pflip[5])) {
4220 DRM_DEBUG("cik_irq_set: vblank 5\n");
4221 crtc6 |= VBLANK_INTERRUPT_MASK;
4222 }
4223 if (rdev->irq.hpd[0]) {
4224 DRM_DEBUG("cik_irq_set: hpd 1\n");
4225 hpd1 |= DC_HPDx_INT_EN;
4226 }
4227 if (rdev->irq.hpd[1]) {
4228 DRM_DEBUG("cik_irq_set: hpd 2\n");
4229 hpd2 |= DC_HPDx_INT_EN;
4230 }
4231 if (rdev->irq.hpd[2]) {
4232 DRM_DEBUG("cik_irq_set: hpd 3\n");
4233 hpd3 |= DC_HPDx_INT_EN;
4234 }
4235 if (rdev->irq.hpd[3]) {
4236 DRM_DEBUG("cik_irq_set: hpd 4\n");
4237 hpd4 |= DC_HPDx_INT_EN;
4238 }
4239 if (rdev->irq.hpd[4]) {
4240 DRM_DEBUG("cik_irq_set: hpd 5\n");
4241 hpd5 |= DC_HPDx_INT_EN;
4242 }
4243 if (rdev->irq.hpd[5]) {
4244 DRM_DEBUG("cik_irq_set: hpd 6\n");
4245 hpd6 |= DC_HPDx_INT_EN;
4246 }
4247
4248 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4249
Alex Deucher21a93e12013-04-09 12:47:11 -04004250 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4251 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4252
Alex Deuchera59781b2012-11-09 10:45:57 -05004253 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4254
4255 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4256 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4257 if (rdev->num_crtc >= 4) {
4258 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4259 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4260 }
4261 if (rdev->num_crtc >= 6) {
4262 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4263 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4264 }
4265
4266 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4267 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4268 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4269 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4270 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4271 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4272
4273 return 0;
4274}
4275
4276/**
4277 * cik_irq_ack - ack interrupt sources
4278 *
4279 * @rdev: radeon_device pointer
4280 *
4281 * Ack interrupt sources on the GPU (vblanks, hpd,
4282 * etc.) (CIK). Certain interrupts sources are sw
4283 * generated and do not require an explicit ack.
4284 */
4285static inline void cik_irq_ack(struct radeon_device *rdev)
4286{
4287 u32 tmp;
4288
4289 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4290 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4291 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4292 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4293 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4294 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4295 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4296
4297 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4298 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4299 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4300 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4301 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4302 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4303 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4304 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4305
4306 if (rdev->num_crtc >= 4) {
4307 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4308 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4309 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4310 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4311 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4312 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4313 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4314 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4315 }
4316
4317 if (rdev->num_crtc >= 6) {
4318 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4319 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4320 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4321 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4322 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4323 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4324 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4325 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4326 }
4327
4328 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4329 tmp = RREG32(DC_HPD1_INT_CONTROL);
4330 tmp |= DC_HPDx_INT_ACK;
4331 WREG32(DC_HPD1_INT_CONTROL, tmp);
4332 }
4333 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4334 tmp = RREG32(DC_HPD2_INT_CONTROL);
4335 tmp |= DC_HPDx_INT_ACK;
4336 WREG32(DC_HPD2_INT_CONTROL, tmp);
4337 }
4338 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4339 tmp = RREG32(DC_HPD3_INT_CONTROL);
4340 tmp |= DC_HPDx_INT_ACK;
4341 WREG32(DC_HPD3_INT_CONTROL, tmp);
4342 }
4343 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4344 tmp = RREG32(DC_HPD4_INT_CONTROL);
4345 tmp |= DC_HPDx_INT_ACK;
4346 WREG32(DC_HPD4_INT_CONTROL, tmp);
4347 }
4348 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4349 tmp = RREG32(DC_HPD5_INT_CONTROL);
4350 tmp |= DC_HPDx_INT_ACK;
4351 WREG32(DC_HPD5_INT_CONTROL, tmp);
4352 }
4353 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4354 tmp = RREG32(DC_HPD5_INT_CONTROL);
4355 tmp |= DC_HPDx_INT_ACK;
4356 WREG32(DC_HPD6_INT_CONTROL, tmp);
4357 }
4358}
4359
4360/**
4361 * cik_irq_disable - disable interrupts
4362 *
4363 * @rdev: radeon_device pointer
4364 *
4365 * Disable interrupts on the hw (CIK).
4366 */
4367static void cik_irq_disable(struct radeon_device *rdev)
4368{
4369 cik_disable_interrupts(rdev);
4370 /* Wait and acknowledge irq */
4371 mdelay(1);
4372 cik_irq_ack(rdev);
4373 cik_disable_interrupt_state(rdev);
4374}
4375
4376/**
4377 * cik_irq_disable - disable interrupts for suspend
4378 *
4379 * @rdev: radeon_device pointer
4380 *
4381 * Disable interrupts and stop the RLC (CIK).
4382 * Used for suspend.
4383 */
4384static void cik_irq_suspend(struct radeon_device *rdev)
4385{
4386 cik_irq_disable(rdev);
4387 cik_rlc_stop(rdev);
4388}
4389
4390/**
4391 * cik_irq_fini - tear down interrupt support
4392 *
4393 * @rdev: radeon_device pointer
4394 *
4395 * Disable interrupts on the hw and free the IH ring
4396 * buffer (CIK).
4397 * Used for driver unload.
4398 */
4399static void cik_irq_fini(struct radeon_device *rdev)
4400{
4401 cik_irq_suspend(rdev);
4402 r600_ih_ring_fini(rdev);
4403}
4404
4405/**
4406 * cik_get_ih_wptr - get the IH ring buffer wptr
4407 *
4408 * @rdev: radeon_device pointer
4409 *
4410 * Get the IH ring buffer wptr from either the register
4411 * or the writeback memory buffer (CIK). Also check for
4412 * ring buffer overflow and deal with it.
4413 * Used by cik_irq_process().
4414 * Returns the value of the wptr.
4415 */
4416static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4417{
4418 u32 wptr, tmp;
4419
4420 if (rdev->wb.enabled)
4421 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4422 else
4423 wptr = RREG32(IH_RB_WPTR);
4424
4425 if (wptr & RB_OVERFLOW) {
4426 /* When a ring buffer overflow happen start parsing interrupt
4427 * from the last not overwritten vector (wptr + 16). Hopefully
4428 * this should allow us to catchup.
4429 */
4430 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4431 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4432 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4433 tmp = RREG32(IH_RB_CNTL);
4434 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4435 WREG32(IH_RB_CNTL, tmp);
4436 }
4437 return (wptr & rdev->ih.ptr_mask);
4438}
4439
4440/* CIK IV Ring
4441 * Each IV ring entry is 128 bits:
4442 * [7:0] - interrupt source id
4443 * [31:8] - reserved
4444 * [59:32] - interrupt source data
4445 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04004446 * [71:64] - RINGID
4447 * CP:
4448 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05004449 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4450 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4451 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4452 * PIPE_ID - ME0 0=3D
4453 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04004454 * SDMA:
4455 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4456 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4457 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05004458 * [79:72] - VMID
4459 * [95:80] - PASID
4460 * [127:96] - reserved
4461 */
4462/**
4463 * cik_irq_process - interrupt handler
4464 *
4465 * @rdev: radeon_device pointer
4466 *
4467 * Interrupt hander (CIK). Walk the IH ring,
4468 * ack interrupts and schedule work to handle
4469 * interrupt events.
4470 * Returns irq process return code.
4471 */
4472int cik_irq_process(struct radeon_device *rdev)
4473{
4474 u32 wptr;
4475 u32 rptr;
4476 u32 src_id, src_data, ring_id;
4477 u8 me_id, pipe_id, queue_id;
4478 u32 ring_index;
4479 bool queue_hotplug = false;
4480 bool queue_reset = false;
4481
4482 if (!rdev->ih.enabled || rdev->shutdown)
4483 return IRQ_NONE;
4484
4485 wptr = cik_get_ih_wptr(rdev);
4486
4487restart_ih:
4488 /* is somebody else already processing irqs? */
4489 if (atomic_xchg(&rdev->ih.lock, 1))
4490 return IRQ_NONE;
4491
4492 rptr = rdev->ih.rptr;
4493 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4494
4495 /* Order reading of wptr vs. reading of IH ring data */
4496 rmb();
4497
4498 /* display interrupts */
4499 cik_irq_ack(rdev);
4500
4501 while (rptr != wptr) {
4502 /* wptr/rptr are in bytes! */
4503 ring_index = rptr / 4;
4504 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4505 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4506 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05004507
4508 switch (src_id) {
4509 case 1: /* D1 vblank/vline */
4510 switch (src_data) {
4511 case 0: /* D1 vblank */
4512 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4513 if (rdev->irq.crtc_vblank_int[0]) {
4514 drm_handle_vblank(rdev->ddev, 0);
4515 rdev->pm.vblank_sync = true;
4516 wake_up(&rdev->irq.vblank_queue);
4517 }
4518 if (atomic_read(&rdev->irq.pflip[0]))
4519 radeon_crtc_handle_flip(rdev, 0);
4520 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4521 DRM_DEBUG("IH: D1 vblank\n");
4522 }
4523 break;
4524 case 1: /* D1 vline */
4525 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4526 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4527 DRM_DEBUG("IH: D1 vline\n");
4528 }
4529 break;
4530 default:
4531 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4532 break;
4533 }
4534 break;
4535 case 2: /* D2 vblank/vline */
4536 switch (src_data) {
4537 case 0: /* D2 vblank */
4538 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4539 if (rdev->irq.crtc_vblank_int[1]) {
4540 drm_handle_vblank(rdev->ddev, 1);
4541 rdev->pm.vblank_sync = true;
4542 wake_up(&rdev->irq.vblank_queue);
4543 }
4544 if (atomic_read(&rdev->irq.pflip[1]))
4545 radeon_crtc_handle_flip(rdev, 1);
4546 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4547 DRM_DEBUG("IH: D2 vblank\n");
4548 }
4549 break;
4550 case 1: /* D2 vline */
4551 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4552 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4553 DRM_DEBUG("IH: D2 vline\n");
4554 }
4555 break;
4556 default:
4557 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4558 break;
4559 }
4560 break;
4561 case 3: /* D3 vblank/vline */
4562 switch (src_data) {
4563 case 0: /* D3 vblank */
4564 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4565 if (rdev->irq.crtc_vblank_int[2]) {
4566 drm_handle_vblank(rdev->ddev, 2);
4567 rdev->pm.vblank_sync = true;
4568 wake_up(&rdev->irq.vblank_queue);
4569 }
4570 if (atomic_read(&rdev->irq.pflip[2]))
4571 radeon_crtc_handle_flip(rdev, 2);
4572 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4573 DRM_DEBUG("IH: D3 vblank\n");
4574 }
4575 break;
4576 case 1: /* D3 vline */
4577 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4578 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4579 DRM_DEBUG("IH: D3 vline\n");
4580 }
4581 break;
4582 default:
4583 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4584 break;
4585 }
4586 break;
4587 case 4: /* D4 vblank/vline */
4588 switch (src_data) {
4589 case 0: /* D4 vblank */
4590 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4591 if (rdev->irq.crtc_vblank_int[3]) {
4592 drm_handle_vblank(rdev->ddev, 3);
4593 rdev->pm.vblank_sync = true;
4594 wake_up(&rdev->irq.vblank_queue);
4595 }
4596 if (atomic_read(&rdev->irq.pflip[3]))
4597 radeon_crtc_handle_flip(rdev, 3);
4598 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4599 DRM_DEBUG("IH: D4 vblank\n");
4600 }
4601 break;
4602 case 1: /* D4 vline */
4603 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4604 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4605 DRM_DEBUG("IH: D4 vline\n");
4606 }
4607 break;
4608 default:
4609 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4610 break;
4611 }
4612 break;
4613 case 5: /* D5 vblank/vline */
4614 switch (src_data) {
4615 case 0: /* D5 vblank */
4616 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4617 if (rdev->irq.crtc_vblank_int[4]) {
4618 drm_handle_vblank(rdev->ddev, 4);
4619 rdev->pm.vblank_sync = true;
4620 wake_up(&rdev->irq.vblank_queue);
4621 }
4622 if (atomic_read(&rdev->irq.pflip[4]))
4623 radeon_crtc_handle_flip(rdev, 4);
4624 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4625 DRM_DEBUG("IH: D5 vblank\n");
4626 }
4627 break;
4628 case 1: /* D5 vline */
4629 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4630 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4631 DRM_DEBUG("IH: D5 vline\n");
4632 }
4633 break;
4634 default:
4635 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4636 break;
4637 }
4638 break;
4639 case 6: /* D6 vblank/vline */
4640 switch (src_data) {
4641 case 0: /* D6 vblank */
4642 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4643 if (rdev->irq.crtc_vblank_int[5]) {
4644 drm_handle_vblank(rdev->ddev, 5);
4645 rdev->pm.vblank_sync = true;
4646 wake_up(&rdev->irq.vblank_queue);
4647 }
4648 if (atomic_read(&rdev->irq.pflip[5]))
4649 radeon_crtc_handle_flip(rdev, 5);
4650 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4651 DRM_DEBUG("IH: D6 vblank\n");
4652 }
4653 break;
4654 case 1: /* D6 vline */
4655 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4656 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4657 DRM_DEBUG("IH: D6 vline\n");
4658 }
4659 break;
4660 default:
4661 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4662 break;
4663 }
4664 break;
4665 case 42: /* HPD hotplug */
4666 switch (src_data) {
4667 case 0:
4668 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4669 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4670 queue_hotplug = true;
4671 DRM_DEBUG("IH: HPD1\n");
4672 }
4673 break;
4674 case 1:
4675 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4676 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4677 queue_hotplug = true;
4678 DRM_DEBUG("IH: HPD2\n");
4679 }
4680 break;
4681 case 2:
4682 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4683 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4684 queue_hotplug = true;
4685 DRM_DEBUG("IH: HPD3\n");
4686 }
4687 break;
4688 case 3:
4689 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4690 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4691 queue_hotplug = true;
4692 DRM_DEBUG("IH: HPD4\n");
4693 }
4694 break;
4695 case 4:
4696 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4697 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4698 queue_hotplug = true;
4699 DRM_DEBUG("IH: HPD5\n");
4700 }
4701 break;
4702 case 5:
4703 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4704 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4705 queue_hotplug = true;
4706 DRM_DEBUG("IH: HPD6\n");
4707 }
4708 break;
4709 default:
4710 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4711 break;
4712 }
4713 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04004714 case 146:
4715 case 147:
4716 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4717 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4718 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4719 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4720 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4721 /* reset addr and status */
4722 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4723 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004724 case 176: /* GFX RB CP_INT */
4725 case 177: /* GFX IB CP_INT */
4726 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4727 break;
4728 case 181: /* CP EOP event */
4729 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004730 /* XXX check the bitfield order! */
4731 me_id = (ring_id & 0x60) >> 5;
4732 pipe_id = (ring_id & 0x18) >> 3;
4733 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004734 switch (me_id) {
4735 case 0:
4736 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4737 break;
4738 case 1:
4739 /* XXX compute */
4740 break;
4741 case 2:
4742 /* XXX compute */
4743 break;
4744 }
4745 break;
4746 case 184: /* CP Privileged reg access */
4747 DRM_ERROR("Illegal register access in command stream\n");
4748 /* XXX check the bitfield order! */
4749 me_id = (ring_id & 0x60) >> 5;
4750 pipe_id = (ring_id & 0x18) >> 3;
4751 queue_id = (ring_id & 0x7) >> 0;
4752 switch (me_id) {
4753 case 0:
4754 /* This results in a full GPU reset, but all we need to do is soft
4755 * reset the CP for gfx
4756 */
4757 queue_reset = true;
4758 break;
4759 case 1:
4760 /* XXX compute */
4761 break;
4762 case 2:
4763 /* XXX compute */
4764 break;
4765 }
4766 break;
4767 case 185: /* CP Privileged inst */
4768 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004769 /* XXX check the bitfield order! */
4770 me_id = (ring_id & 0x60) >> 5;
4771 pipe_id = (ring_id & 0x18) >> 3;
4772 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004773 switch (me_id) {
4774 case 0:
4775 /* This results in a full GPU reset, but all we need to do is soft
4776 * reset the CP for gfx
4777 */
4778 queue_reset = true;
4779 break;
4780 case 1:
4781 /* XXX compute */
4782 break;
4783 case 2:
4784 /* XXX compute */
4785 break;
4786 }
4787 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04004788 case 224: /* SDMA trap event */
4789 /* XXX check the bitfield order! */
4790 me_id = (ring_id & 0x3) >> 0;
4791 queue_id = (ring_id & 0xc) >> 2;
4792 DRM_DEBUG("IH: SDMA trap\n");
4793 switch (me_id) {
4794 case 0:
4795 switch (queue_id) {
4796 case 0:
4797 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4798 break;
4799 case 1:
4800 /* XXX compute */
4801 break;
4802 case 2:
4803 /* XXX compute */
4804 break;
4805 }
4806 break;
4807 case 1:
4808 switch (queue_id) {
4809 case 0:
4810 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4811 break;
4812 case 1:
4813 /* XXX compute */
4814 break;
4815 case 2:
4816 /* XXX compute */
4817 break;
4818 }
4819 break;
4820 }
4821 break;
4822 case 241: /* SDMA Privileged inst */
4823 case 247: /* SDMA Privileged inst */
4824 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4825 /* XXX check the bitfield order! */
4826 me_id = (ring_id & 0x3) >> 0;
4827 queue_id = (ring_id & 0xc) >> 2;
4828 switch (me_id) {
4829 case 0:
4830 switch (queue_id) {
4831 case 0:
4832 queue_reset = true;
4833 break;
4834 case 1:
4835 /* XXX compute */
4836 queue_reset = true;
4837 break;
4838 case 2:
4839 /* XXX compute */
4840 queue_reset = true;
4841 break;
4842 }
4843 break;
4844 case 1:
4845 switch (queue_id) {
4846 case 0:
4847 queue_reset = true;
4848 break;
4849 case 1:
4850 /* XXX compute */
4851 queue_reset = true;
4852 break;
4853 case 2:
4854 /* XXX compute */
4855 queue_reset = true;
4856 break;
4857 }
4858 break;
4859 }
4860 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004861 case 233: /* GUI IDLE */
4862 DRM_DEBUG("IH: GUI idle\n");
4863 break;
4864 default:
4865 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4866 break;
4867 }
4868
4869 /* wptr/rptr are in bytes! */
4870 rptr += 16;
4871 rptr &= rdev->ih.ptr_mask;
4872 }
4873 if (queue_hotplug)
4874 schedule_work(&rdev->hotplug_work);
4875 if (queue_reset)
4876 schedule_work(&rdev->reset_work);
4877 rdev->ih.rptr = rptr;
4878 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4879 atomic_set(&rdev->ih.lock, 0);
4880
4881 /* make sure wptr hasn't changed while processing */
4882 wptr = cik_get_ih_wptr(rdev);
4883 if (wptr != rptr)
4884 goto restart_ih;
4885
4886 return IRQ_HANDLED;
4887}
Alex Deucher7bf94a22012-08-17 11:48:29 -04004888
4889/*
4890 * startup/shutdown callbacks
4891 */
4892/**
4893 * cik_startup - program the asic to a functional state
4894 *
4895 * @rdev: radeon_device pointer
4896 *
4897 * Programs the asic to a functional state (CIK).
4898 * Called by cik_init() and cik_resume().
4899 * Returns 0 for success, error for failure.
4900 */
4901static int cik_startup(struct radeon_device *rdev)
4902{
4903 struct radeon_ring *ring;
4904 int r;
4905
4906 if (rdev->flags & RADEON_IS_IGP) {
4907 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4908 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4909 r = cik_init_microcode(rdev);
4910 if (r) {
4911 DRM_ERROR("Failed to load firmware!\n");
4912 return r;
4913 }
4914 }
4915 } else {
4916 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4917 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4918 !rdev->mc_fw) {
4919 r = cik_init_microcode(rdev);
4920 if (r) {
4921 DRM_ERROR("Failed to load firmware!\n");
4922 return r;
4923 }
4924 }
4925
4926 r = ci_mc_load_microcode(rdev);
4927 if (r) {
4928 DRM_ERROR("Failed to load MC firmware!\n");
4929 return r;
4930 }
4931 }
4932
4933 r = r600_vram_scratch_init(rdev);
4934 if (r)
4935 return r;
4936
4937 cik_mc_program(rdev);
4938 r = cik_pcie_gart_enable(rdev);
4939 if (r)
4940 return r;
4941 cik_gpu_init(rdev);
4942
4943 /* allocate rlc buffers */
4944 r = si_rlc_init(rdev);
4945 if (r) {
4946 DRM_ERROR("Failed to init rlc BOs!\n");
4947 return r;
4948 }
4949
4950 /* allocate wb buffer */
4951 r = radeon_wb_init(rdev);
4952 if (r)
4953 return r;
4954
4955 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4956 if (r) {
4957 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4958 return r;
4959 }
4960
4961 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4962 if (r) {
4963 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4964 return r;
4965 }
4966
4967 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4968 if (r) {
4969 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4970 return r;
4971 }
4972
Christian König87167bb2013-04-09 13:39:21 -04004973 r = cik_uvd_resume(rdev);
4974 if (!r) {
4975 r = radeon_fence_driver_start_ring(rdev,
4976 R600_RING_TYPE_UVD_INDEX);
4977 if (r)
4978 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
4979 }
4980 if (r)
4981 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
4982
Alex Deucher7bf94a22012-08-17 11:48:29 -04004983 /* Enable IRQ */
4984 if (!rdev->irq.installed) {
4985 r = radeon_irq_kms_init(rdev);
4986 if (r)
4987 return r;
4988 }
4989
4990 r = cik_irq_init(rdev);
4991 if (r) {
4992 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4993 radeon_irq_kms_fini(rdev);
4994 return r;
4995 }
4996 cik_irq_set(rdev);
4997
4998 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4999 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5000 CP_RB0_RPTR, CP_RB0_WPTR,
5001 0, 0xfffff, RADEON_CP_PACKET2);
5002 if (r)
5003 return r;
5004
5005 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5006 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5007 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
5008 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
5009 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5010 if (r)
5011 return r;
5012
5013 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5014 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5015 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
5016 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
5017 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
5018 if (r)
5019 return r;
5020
5021 r = cik_cp_resume(rdev);
5022 if (r)
5023 return r;
5024
5025 r = cik_sdma_resume(rdev);
5026 if (r)
5027 return r;
5028
Christian König87167bb2013-04-09 13:39:21 -04005029 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5030 if (ring->ring_size) {
5031 r = radeon_ring_init(rdev, ring, ring->ring_size,
5032 R600_WB_UVD_RPTR_OFFSET,
5033 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5034 0, 0xfffff, RADEON_CP_PACKET2);
5035 if (!r)
5036 r = r600_uvd_init(rdev);
5037 if (r)
5038 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5039 }
5040
Alex Deucher7bf94a22012-08-17 11:48:29 -04005041 r = radeon_ib_pool_init(rdev);
5042 if (r) {
5043 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5044 return r;
5045 }
5046
5047 r = radeon_vm_manager_init(rdev);
5048 if (r) {
5049 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5050 return r;
5051 }
5052
5053 return 0;
5054}
5055
5056/**
5057 * cik_resume - resume the asic to a functional state
5058 *
5059 * @rdev: radeon_device pointer
5060 *
5061 * Programs the asic to a functional state (CIK).
5062 * Called at resume.
5063 * Returns 0 for success, error for failure.
5064 */
5065int cik_resume(struct radeon_device *rdev)
5066{
5067 int r;
5068
5069 /* post card */
5070 atom_asic_init(rdev->mode_info.atom_context);
5071
5072 rdev->accel_working = true;
5073 r = cik_startup(rdev);
5074 if (r) {
5075 DRM_ERROR("cik startup failed on resume\n");
5076 rdev->accel_working = false;
5077 return r;
5078 }
5079
5080 return r;
5081
5082}
5083
5084/**
5085 * cik_suspend - suspend the asic
5086 *
5087 * @rdev: radeon_device pointer
5088 *
5089 * Bring the chip into a state suitable for suspend (CIK).
5090 * Called at suspend.
5091 * Returns 0 for success.
5092 */
5093int cik_suspend(struct radeon_device *rdev)
5094{
5095 radeon_vm_manager_fini(rdev);
5096 cik_cp_enable(rdev, false);
5097 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04005098 r600_uvd_rbc_stop(rdev);
5099 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005100 cik_irq_suspend(rdev);
5101 radeon_wb_disable(rdev);
5102 cik_pcie_gart_disable(rdev);
5103 return 0;
5104}
5105
5106/* Plan is to move initialization in that function and use
5107 * helper function so that radeon_device_init pretty much
5108 * do nothing more than calling asic specific function. This
5109 * should also allow to remove a bunch of callback function
5110 * like vram_info.
5111 */
5112/**
5113 * cik_init - asic specific driver and hw init
5114 *
5115 * @rdev: radeon_device pointer
5116 *
5117 * Setup asic specific driver variables and program the hw
5118 * to a functional state (CIK).
5119 * Called at driver startup.
5120 * Returns 0 for success, errors for failure.
5121 */
5122int cik_init(struct radeon_device *rdev)
5123{
5124 struct radeon_ring *ring;
5125 int r;
5126
5127 /* Read BIOS */
5128 if (!radeon_get_bios(rdev)) {
5129 if (ASIC_IS_AVIVO(rdev))
5130 return -EINVAL;
5131 }
5132 /* Must be an ATOMBIOS */
5133 if (!rdev->is_atom_bios) {
5134 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5135 return -EINVAL;
5136 }
5137 r = radeon_atombios_init(rdev);
5138 if (r)
5139 return r;
5140
5141 /* Post card if necessary */
5142 if (!radeon_card_posted(rdev)) {
5143 if (!rdev->bios) {
5144 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5145 return -EINVAL;
5146 }
5147 DRM_INFO("GPU not posted. posting now...\n");
5148 atom_asic_init(rdev->mode_info.atom_context);
5149 }
5150 /* Initialize scratch registers */
5151 cik_scratch_init(rdev);
5152 /* Initialize surface registers */
5153 radeon_surface_init(rdev);
5154 /* Initialize clocks */
5155 radeon_get_clock_info(rdev->ddev);
5156
5157 /* Fence driver */
5158 r = radeon_fence_driver_init(rdev);
5159 if (r)
5160 return r;
5161
5162 /* initialize memory controller */
5163 r = cik_mc_init(rdev);
5164 if (r)
5165 return r;
5166 /* Memory manager */
5167 r = radeon_bo_init(rdev);
5168 if (r)
5169 return r;
5170
5171 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5172 ring->ring_obj = NULL;
5173 r600_ring_init(rdev, ring, 1024 * 1024);
5174
5175 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5176 ring->ring_obj = NULL;
5177 r600_ring_init(rdev, ring, 256 * 1024);
5178
5179 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5180 ring->ring_obj = NULL;
5181 r600_ring_init(rdev, ring, 256 * 1024);
5182
Christian König87167bb2013-04-09 13:39:21 -04005183 r = radeon_uvd_init(rdev);
5184 if (!r) {
5185 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5186 ring->ring_obj = NULL;
5187 r600_ring_init(rdev, ring, 4096);
5188 }
5189
Alex Deucher7bf94a22012-08-17 11:48:29 -04005190 rdev->ih.ring_obj = NULL;
5191 r600_ih_ring_init(rdev, 64 * 1024);
5192
5193 r = r600_pcie_gart_init(rdev);
5194 if (r)
5195 return r;
5196
5197 rdev->accel_working = true;
5198 r = cik_startup(rdev);
5199 if (r) {
5200 dev_err(rdev->dev, "disabling GPU acceleration\n");
5201 cik_cp_fini(rdev);
5202 cik_sdma_fini(rdev);
5203 cik_irq_fini(rdev);
5204 si_rlc_fini(rdev);
5205 radeon_wb_fini(rdev);
5206 radeon_ib_pool_fini(rdev);
5207 radeon_vm_manager_fini(rdev);
5208 radeon_irq_kms_fini(rdev);
5209 cik_pcie_gart_fini(rdev);
5210 rdev->accel_working = false;
5211 }
5212
5213 /* Don't start up if the MC ucode is missing.
5214 * The default clocks and voltages before the MC ucode
5215 * is loaded are not suffient for advanced operations.
5216 */
5217 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5218 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5219 return -EINVAL;
5220 }
5221
5222 return 0;
5223}
5224
5225/**
5226 * cik_fini - asic specific driver and hw fini
5227 *
5228 * @rdev: radeon_device pointer
5229 *
5230 * Tear down the asic specific driver variables and program the hw
5231 * to an idle state (CIK).
5232 * Called at driver unload.
5233 */
5234void cik_fini(struct radeon_device *rdev)
5235{
5236 cik_cp_fini(rdev);
5237 cik_sdma_fini(rdev);
5238 cik_irq_fini(rdev);
5239 si_rlc_fini(rdev);
5240 radeon_wb_fini(rdev);
5241 radeon_vm_manager_fini(rdev);
5242 radeon_ib_pool_fini(rdev);
5243 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04005244 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005245 cik_pcie_gart_fini(rdev);
5246 r600_vram_scratch_fini(rdev);
5247 radeon_gem_fini(rdev);
5248 radeon_fence_driver_fini(rdev);
5249 radeon_bo_fini(rdev);
5250 radeon_atombios_fini(rdev);
5251 kfree(rdev->bios);
5252 rdev->bios = NULL;
5253}
Alex Deuchercd84a272012-07-20 17:13:13 -04005254
5255/* display watermark setup */
5256/**
5257 * dce8_line_buffer_adjust - Set up the line buffer
5258 *
5259 * @rdev: radeon_device pointer
5260 * @radeon_crtc: the selected display controller
5261 * @mode: the current display mode on the selected display
5262 * controller
5263 *
5264 * Setup up the line buffer allocation for
5265 * the selected display controller (CIK).
5266 * Returns the line buffer size in pixels.
5267 */
5268static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5269 struct radeon_crtc *radeon_crtc,
5270 struct drm_display_mode *mode)
5271{
5272 u32 tmp;
5273
5274 /*
5275 * Line Buffer Setup
5276 * There are 6 line buffers, one for each display controllers.
5277 * There are 3 partitions per LB. Select the number of partitions
5278 * to enable based on the display width. For display widths larger
5279 * than 4096, you need use to use 2 display controllers and combine
5280 * them using the stereo blender.
5281 */
5282 if (radeon_crtc->base.enabled && mode) {
5283 if (mode->crtc_hdisplay < 1920)
5284 tmp = 1;
5285 else if (mode->crtc_hdisplay < 2560)
5286 tmp = 2;
5287 else if (mode->crtc_hdisplay < 4096)
5288 tmp = 0;
5289 else {
5290 DRM_DEBUG_KMS("Mode too big for LB!\n");
5291 tmp = 0;
5292 }
5293 } else
5294 tmp = 1;
5295
5296 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5297 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5298
5299 if (radeon_crtc->base.enabled && mode) {
5300 switch (tmp) {
5301 case 0:
5302 default:
5303 return 4096 * 2;
5304 case 1:
5305 return 1920 * 2;
5306 case 2:
5307 return 2560 * 2;
5308 }
5309 }
5310
5311 /* controller not enabled, so no lb used */
5312 return 0;
5313}
5314
5315/**
5316 * cik_get_number_of_dram_channels - get the number of dram channels
5317 *
5318 * @rdev: radeon_device pointer
5319 *
5320 * Look up the number of video ram channels (CIK).
5321 * Used for display watermark bandwidth calculations
5322 * Returns the number of dram channels
5323 */
5324static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5325{
5326 u32 tmp = RREG32(MC_SHARED_CHMAP);
5327
5328 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5329 case 0:
5330 default:
5331 return 1;
5332 case 1:
5333 return 2;
5334 case 2:
5335 return 4;
5336 case 3:
5337 return 8;
5338 case 4:
5339 return 3;
5340 case 5:
5341 return 6;
5342 case 6:
5343 return 10;
5344 case 7:
5345 return 12;
5346 case 8:
5347 return 16;
5348 }
5349}
5350
5351struct dce8_wm_params {
5352 u32 dram_channels; /* number of dram channels */
5353 u32 yclk; /* bandwidth per dram data pin in kHz */
5354 u32 sclk; /* engine clock in kHz */
5355 u32 disp_clk; /* display clock in kHz */
5356 u32 src_width; /* viewport width */
5357 u32 active_time; /* active display time in ns */
5358 u32 blank_time; /* blank time in ns */
5359 bool interlaced; /* mode is interlaced */
5360 fixed20_12 vsc; /* vertical scale ratio */
5361 u32 num_heads; /* number of active crtcs */
5362 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5363 u32 lb_size; /* line buffer allocated to pipe */
5364 u32 vtaps; /* vertical scaler taps */
5365};
5366
5367/**
5368 * dce8_dram_bandwidth - get the dram bandwidth
5369 *
5370 * @wm: watermark calculation data
5371 *
5372 * Calculate the raw dram bandwidth (CIK).
5373 * Used for display watermark bandwidth calculations
5374 * Returns the dram bandwidth in MBytes/s
5375 */
5376static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5377{
5378 /* Calculate raw DRAM Bandwidth */
5379 fixed20_12 dram_efficiency; /* 0.7 */
5380 fixed20_12 yclk, dram_channels, bandwidth;
5381 fixed20_12 a;
5382
5383 a.full = dfixed_const(1000);
5384 yclk.full = dfixed_const(wm->yclk);
5385 yclk.full = dfixed_div(yclk, a);
5386 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5387 a.full = dfixed_const(10);
5388 dram_efficiency.full = dfixed_const(7);
5389 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5390 bandwidth.full = dfixed_mul(dram_channels, yclk);
5391 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5392
5393 return dfixed_trunc(bandwidth);
5394}
5395
5396/**
5397 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5398 *
5399 * @wm: watermark calculation data
5400 *
5401 * Calculate the dram bandwidth used for display (CIK).
5402 * Used for display watermark bandwidth calculations
5403 * Returns the dram bandwidth for display in MBytes/s
5404 */
5405static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5406{
5407 /* Calculate DRAM Bandwidth and the part allocated to display. */
5408 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5409 fixed20_12 yclk, dram_channels, bandwidth;
5410 fixed20_12 a;
5411
5412 a.full = dfixed_const(1000);
5413 yclk.full = dfixed_const(wm->yclk);
5414 yclk.full = dfixed_div(yclk, a);
5415 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5416 a.full = dfixed_const(10);
5417 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5418 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5419 bandwidth.full = dfixed_mul(dram_channels, yclk);
5420 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5421
5422 return dfixed_trunc(bandwidth);
5423}
5424
5425/**
5426 * dce8_data_return_bandwidth - get the data return bandwidth
5427 *
5428 * @wm: watermark calculation data
5429 *
5430 * Calculate the data return bandwidth used for display (CIK).
5431 * Used for display watermark bandwidth calculations
5432 * Returns the data return bandwidth in MBytes/s
5433 */
5434static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5435{
5436 /* Calculate the display Data return Bandwidth */
5437 fixed20_12 return_efficiency; /* 0.8 */
5438 fixed20_12 sclk, bandwidth;
5439 fixed20_12 a;
5440
5441 a.full = dfixed_const(1000);
5442 sclk.full = dfixed_const(wm->sclk);
5443 sclk.full = dfixed_div(sclk, a);
5444 a.full = dfixed_const(10);
5445 return_efficiency.full = dfixed_const(8);
5446 return_efficiency.full = dfixed_div(return_efficiency, a);
5447 a.full = dfixed_const(32);
5448 bandwidth.full = dfixed_mul(a, sclk);
5449 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5450
5451 return dfixed_trunc(bandwidth);
5452}
5453
5454/**
5455 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5456 *
5457 * @wm: watermark calculation data
5458 *
5459 * Calculate the dmif bandwidth used for display (CIK).
5460 * Used for display watermark bandwidth calculations
5461 * Returns the dmif bandwidth in MBytes/s
5462 */
5463static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5464{
5465 /* Calculate the DMIF Request Bandwidth */
5466 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5467 fixed20_12 disp_clk, bandwidth;
5468 fixed20_12 a, b;
5469
5470 a.full = dfixed_const(1000);
5471 disp_clk.full = dfixed_const(wm->disp_clk);
5472 disp_clk.full = dfixed_div(disp_clk, a);
5473 a.full = dfixed_const(32);
5474 b.full = dfixed_mul(a, disp_clk);
5475
5476 a.full = dfixed_const(10);
5477 disp_clk_request_efficiency.full = dfixed_const(8);
5478 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5479
5480 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5481
5482 return dfixed_trunc(bandwidth);
5483}
5484
5485/**
5486 * dce8_available_bandwidth - get the min available bandwidth
5487 *
5488 * @wm: watermark calculation data
5489 *
5490 * Calculate the min available bandwidth used for display (CIK).
5491 * Used for display watermark bandwidth calculations
5492 * Returns the min available bandwidth in MBytes/s
5493 */
5494static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5495{
5496 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5497 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5498 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5499 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5500
5501 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5502}
5503
5504/**
5505 * dce8_average_bandwidth - get the average available bandwidth
5506 *
5507 * @wm: watermark calculation data
5508 *
5509 * Calculate the average available bandwidth used for display (CIK).
5510 * Used for display watermark bandwidth calculations
5511 * Returns the average available bandwidth in MBytes/s
5512 */
5513static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5514{
5515 /* Calculate the display mode Average Bandwidth
5516 * DisplayMode should contain the source and destination dimensions,
5517 * timing, etc.
5518 */
5519 fixed20_12 bpp;
5520 fixed20_12 line_time;
5521 fixed20_12 src_width;
5522 fixed20_12 bandwidth;
5523 fixed20_12 a;
5524
5525 a.full = dfixed_const(1000);
5526 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5527 line_time.full = dfixed_div(line_time, a);
5528 bpp.full = dfixed_const(wm->bytes_per_pixel);
5529 src_width.full = dfixed_const(wm->src_width);
5530 bandwidth.full = dfixed_mul(src_width, bpp);
5531 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5532 bandwidth.full = dfixed_div(bandwidth, line_time);
5533
5534 return dfixed_trunc(bandwidth);
5535}
5536
5537/**
5538 * dce8_latency_watermark - get the latency watermark
5539 *
5540 * @wm: watermark calculation data
5541 *
5542 * Calculate the latency watermark (CIK).
5543 * Used for display watermark bandwidth calculations
5544 * Returns the latency watermark in ns
5545 */
5546static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5547{
5548 /* First calculate the latency in ns */
5549 u32 mc_latency = 2000; /* 2000 ns. */
5550 u32 available_bandwidth = dce8_available_bandwidth(wm);
5551 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5552 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5553 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5554 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5555 (wm->num_heads * cursor_line_pair_return_time);
5556 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5557 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5558 u32 tmp, dmif_size = 12288;
5559 fixed20_12 a, b, c;
5560
5561 if (wm->num_heads == 0)
5562 return 0;
5563
5564 a.full = dfixed_const(2);
5565 b.full = dfixed_const(1);
5566 if ((wm->vsc.full > a.full) ||
5567 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5568 (wm->vtaps >= 5) ||
5569 ((wm->vsc.full >= a.full) && wm->interlaced))
5570 max_src_lines_per_dst_line = 4;
5571 else
5572 max_src_lines_per_dst_line = 2;
5573
5574 a.full = dfixed_const(available_bandwidth);
5575 b.full = dfixed_const(wm->num_heads);
5576 a.full = dfixed_div(a, b);
5577
5578 b.full = dfixed_const(mc_latency + 512);
5579 c.full = dfixed_const(wm->disp_clk);
5580 b.full = dfixed_div(b, c);
5581
5582 c.full = dfixed_const(dmif_size);
5583 b.full = dfixed_div(c, b);
5584
5585 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5586
5587 b.full = dfixed_const(1000);
5588 c.full = dfixed_const(wm->disp_clk);
5589 b.full = dfixed_div(c, b);
5590 c.full = dfixed_const(wm->bytes_per_pixel);
5591 b.full = dfixed_mul(b, c);
5592
5593 lb_fill_bw = min(tmp, dfixed_trunc(b));
5594
5595 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5596 b.full = dfixed_const(1000);
5597 c.full = dfixed_const(lb_fill_bw);
5598 b.full = dfixed_div(c, b);
5599 a.full = dfixed_div(a, b);
5600 line_fill_time = dfixed_trunc(a);
5601
5602 if (line_fill_time < wm->active_time)
5603 return latency;
5604 else
5605 return latency + (line_fill_time - wm->active_time);
5606
5607}
5608
5609/**
5610 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5611 * average and available dram bandwidth
5612 *
5613 * @wm: watermark calculation data
5614 *
5615 * Check if the display average bandwidth fits in the display
5616 * dram bandwidth (CIK).
5617 * Used for display watermark bandwidth calculations
5618 * Returns true if the display fits, false if not.
5619 */
5620static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5621{
5622 if (dce8_average_bandwidth(wm) <=
5623 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5624 return true;
5625 else
5626 return false;
5627}
5628
5629/**
5630 * dce8_average_bandwidth_vs_available_bandwidth - check
5631 * average and available bandwidth
5632 *
5633 * @wm: watermark calculation data
5634 *
5635 * Check if the display average bandwidth fits in the display
5636 * available bandwidth (CIK).
5637 * Used for display watermark bandwidth calculations
5638 * Returns true if the display fits, false if not.
5639 */
5640static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5641{
5642 if (dce8_average_bandwidth(wm) <=
5643 (dce8_available_bandwidth(wm) / wm->num_heads))
5644 return true;
5645 else
5646 return false;
5647}
5648
5649/**
5650 * dce8_check_latency_hiding - check latency hiding
5651 *
5652 * @wm: watermark calculation data
5653 *
5654 * Check latency hiding (CIK).
5655 * Used for display watermark bandwidth calculations
5656 * Returns true if the display fits, false if not.
5657 */
5658static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5659{
5660 u32 lb_partitions = wm->lb_size / wm->src_width;
5661 u32 line_time = wm->active_time + wm->blank_time;
5662 u32 latency_tolerant_lines;
5663 u32 latency_hiding;
5664 fixed20_12 a;
5665
5666 a.full = dfixed_const(1);
5667 if (wm->vsc.full > a.full)
5668 latency_tolerant_lines = 1;
5669 else {
5670 if (lb_partitions <= (wm->vtaps + 1))
5671 latency_tolerant_lines = 1;
5672 else
5673 latency_tolerant_lines = 2;
5674 }
5675
5676 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5677
5678 if (dce8_latency_watermark(wm) <= latency_hiding)
5679 return true;
5680 else
5681 return false;
5682}
5683
5684/**
5685 * dce8_program_watermarks - program display watermarks
5686 *
5687 * @rdev: radeon_device pointer
5688 * @radeon_crtc: the selected display controller
5689 * @lb_size: line buffer size
5690 * @num_heads: number of display controllers in use
5691 *
5692 * Calculate and program the display watermarks for the
5693 * selected display controller (CIK).
5694 */
5695static void dce8_program_watermarks(struct radeon_device *rdev,
5696 struct radeon_crtc *radeon_crtc,
5697 u32 lb_size, u32 num_heads)
5698{
5699 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5700 struct dce8_wm_params wm;
5701 u32 pixel_period;
5702 u32 line_time = 0;
5703 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5704 u32 tmp, wm_mask;
5705
5706 if (radeon_crtc->base.enabled && num_heads && mode) {
5707 pixel_period = 1000000 / (u32)mode->clock;
5708 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5709
5710 wm.yclk = rdev->pm.current_mclk * 10;
5711 wm.sclk = rdev->pm.current_sclk * 10;
5712 wm.disp_clk = mode->clock;
5713 wm.src_width = mode->crtc_hdisplay;
5714 wm.active_time = mode->crtc_hdisplay * pixel_period;
5715 wm.blank_time = line_time - wm.active_time;
5716 wm.interlaced = false;
5717 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5718 wm.interlaced = true;
5719 wm.vsc = radeon_crtc->vsc;
5720 wm.vtaps = 1;
5721 if (radeon_crtc->rmx_type != RMX_OFF)
5722 wm.vtaps = 2;
5723 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5724 wm.lb_size = lb_size;
5725 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5726 wm.num_heads = num_heads;
5727
5728 /* set for high clocks */
5729 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5730 /* set for low clocks */
5731 /* wm.yclk = low clk; wm.sclk = low clk */
5732 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5733
5734 /* possibly force display priority to high */
5735 /* should really do this at mode validation time... */
5736 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5737 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5738 !dce8_check_latency_hiding(&wm) ||
5739 (rdev->disp_priority == 2)) {
5740 DRM_DEBUG_KMS("force priority to high\n");
5741 }
5742 }
5743
5744 /* select wm A */
5745 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5746 tmp = wm_mask;
5747 tmp &= ~LATENCY_WATERMARK_MASK(3);
5748 tmp |= LATENCY_WATERMARK_MASK(1);
5749 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5750 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5751 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5752 LATENCY_HIGH_WATERMARK(line_time)));
5753 /* select wm B */
5754 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5755 tmp &= ~LATENCY_WATERMARK_MASK(3);
5756 tmp |= LATENCY_WATERMARK_MASK(2);
5757 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5758 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5759 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5760 LATENCY_HIGH_WATERMARK(line_time)));
5761 /* restore original selection */
5762 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5763}
5764
5765/**
5766 * dce8_bandwidth_update - program display watermarks
5767 *
5768 * @rdev: radeon_device pointer
5769 *
5770 * Calculate and program the display watermarks and line
5771 * buffer allocation (CIK).
5772 */
5773void dce8_bandwidth_update(struct radeon_device *rdev)
5774{
5775 struct drm_display_mode *mode = NULL;
5776 u32 num_heads = 0, lb_size;
5777 int i;
5778
5779 radeon_update_display_priority(rdev);
5780
5781 for (i = 0; i < rdev->num_crtc; i++) {
5782 if (rdev->mode_info.crtcs[i]->base.enabled)
5783 num_heads++;
5784 }
5785 for (i = 0; i < rdev->num_crtc; i++) {
5786 mode = &rdev->mode_info.crtcs[i]->base.mode;
5787 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5788 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5789 }
5790}
Alex Deucher44fa3462012-12-18 22:17:00 -05005791
5792/**
5793 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5794 *
5795 * @rdev: radeon_device pointer
5796 *
5797 * Fetches a GPU clock counter snapshot (SI).
5798 * Returns the 64 bit clock counter snapshot.
5799 */
5800uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5801{
5802 uint64_t clock;
5803
5804 mutex_lock(&rdev->gpu_clock_mutex);
5805 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5806 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5807 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5808 mutex_unlock(&rdev->gpu_clock_mutex);
5809 return clock;
5810}
5811
Christian König87167bb2013-04-09 13:39:21 -04005812static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
5813 u32 cntl_reg, u32 status_reg)
5814{
5815 int r, i;
5816 struct atom_clock_dividers dividers;
5817 uint32_t tmp;
5818
5819 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
5820 clock, false, &dividers);
5821 if (r)
5822 return r;
5823
5824 tmp = RREG32_SMC(cntl_reg);
5825 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
5826 tmp |= dividers.post_divider;
5827 WREG32_SMC(cntl_reg, tmp);
5828
5829 for (i = 0; i < 100; i++) {
5830 if (RREG32_SMC(status_reg) & DCLK_STATUS)
5831 break;
5832 mdelay(10);
5833 }
5834 if (i == 100)
5835 return -ETIMEDOUT;
5836
5837 return 0;
5838}
5839
5840int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5841{
5842 int r = 0;
5843
5844 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
5845 if (r)
5846 return r;
5847
5848 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
5849 return r;
5850}
5851
5852int cik_uvd_resume(struct radeon_device *rdev)
5853{
5854 uint64_t addr;
5855 uint32_t size;
5856 int r;
5857
5858 r = radeon_uvd_resume(rdev);
5859 if (r)
5860 return r;
5861
5862 /* programm the VCPU memory controller bits 0-27 */
5863 addr = rdev->uvd.gpu_addr >> 3;
5864 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
5865 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
5866 WREG32(UVD_VCPU_CACHE_SIZE0, size);
5867
5868 addr += size;
5869 size = RADEON_UVD_STACK_SIZE >> 3;
5870 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
5871 WREG32(UVD_VCPU_CACHE_SIZE1, size);
5872
5873 addr += size;
5874 size = RADEON_UVD_HEAP_SIZE >> 3;
5875 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
5876 WREG32(UVD_VCPU_CACHE_SIZE2, size);
5877
5878 /* bits 28-31 */
5879 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
5880 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
5881
5882 /* bits 32-39 */
5883 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
5884 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
5885
5886 return 0;
5887}