blob: a61c373c2bc34fd2ec5929d5787237f60ecf9d02 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher6e2c3c02013-04-03 19:28:32 -040081/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
Alex Deucher2c679122013-04-09 13:32:18 -0400102/**
103 * cik_get_xclk - get the xclk
104 *
105 * @rdev: radeon_device pointer
106 *
107 * Returns the reference clock used by the gfx engine
108 * (CIK).
109 */
110u32 cik_get_xclk(struct radeon_device *rdev)
111{
112 u32 reference_clock = rdev->clock.spll.reference_freq;
113
114 if (rdev->flags & RADEON_IS_IGP) {
115 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
116 return reference_clock / 2;
117 } else {
118 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
119 return reference_clock / 4;
120 }
121 return reference_clock;
122}
123
Alex Deucherbc8273f2012-06-29 19:44:04 -0400124#define BONAIRE_IO_MC_REGS_SIZE 36
125
126static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
127{
128 {0x00000070, 0x04400000},
129 {0x00000071, 0x80c01803},
130 {0x00000072, 0x00004004},
131 {0x00000073, 0x00000100},
132 {0x00000074, 0x00ff0000},
133 {0x00000075, 0x34000000},
134 {0x00000076, 0x08000014},
135 {0x00000077, 0x00cc08ec},
136 {0x00000078, 0x00000400},
137 {0x00000079, 0x00000000},
138 {0x0000007a, 0x04090000},
139 {0x0000007c, 0x00000000},
140 {0x0000007e, 0x4408a8e8},
141 {0x0000007f, 0x00000304},
142 {0x00000080, 0x00000000},
143 {0x00000082, 0x00000001},
144 {0x00000083, 0x00000002},
145 {0x00000084, 0xf3e4f400},
146 {0x00000085, 0x052024e3},
147 {0x00000087, 0x00000000},
148 {0x00000088, 0x01000000},
149 {0x0000008a, 0x1c0a0000},
150 {0x0000008b, 0xff010000},
151 {0x0000008d, 0xffffefff},
152 {0x0000008e, 0xfff3efff},
153 {0x0000008f, 0xfff3efbf},
154 {0x00000092, 0xf7ffffff},
155 {0x00000093, 0xffffff7f},
156 {0x00000095, 0x00101101},
157 {0x00000096, 0x00000fff},
158 {0x00000097, 0x00116fff},
159 {0x00000098, 0x60010000},
160 {0x00000099, 0x10010000},
161 {0x0000009a, 0x00006000},
162 {0x0000009b, 0x00001000},
163 {0x0000009f, 0x00b48000}
164};
165
Alex Deucherb556b122013-01-29 10:44:22 -0500166/**
167 * cik_srbm_select - select specific register instances
168 *
169 * @rdev: radeon_device pointer
170 * @me: selected ME (micro engine)
171 * @pipe: pipe
172 * @queue: queue
173 * @vmid: VMID
174 *
175 * Switches the currently active registers instances. Some
176 * registers are instanced per VMID, others are instanced per
177 * me/pipe/queue combination.
178 */
179static void cik_srbm_select(struct radeon_device *rdev,
180 u32 me, u32 pipe, u32 queue, u32 vmid)
181{
182 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
183 MEID(me & 0x3) |
184 VMID(vmid & 0xf) |
185 QUEUEID(queue & 0x7));
186 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
187}
188
Alex Deucherbc8273f2012-06-29 19:44:04 -0400189/* ucode loading */
190/**
191 * ci_mc_load_microcode - load MC ucode into the hw
192 *
193 * @rdev: radeon_device pointer
194 *
195 * Load the GDDR MC ucode into the hw (CIK).
196 * Returns 0 on success, error on failure.
197 */
198static int ci_mc_load_microcode(struct radeon_device *rdev)
199{
200 const __be32 *fw_data;
201 u32 running, blackout = 0;
202 u32 *io_mc_regs;
203 int i, ucode_size, regs_size;
204
205 if (!rdev->mc_fw)
206 return -EINVAL;
207
208 switch (rdev->family) {
209 case CHIP_BONAIRE:
210 default:
211 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
212 ucode_size = CIK_MC_UCODE_SIZE;
213 regs_size = BONAIRE_IO_MC_REGS_SIZE;
214 break;
215 }
216
217 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
218
219 if (running == 0) {
220 if (running) {
221 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
222 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
223 }
224
225 /* reset the engine and set to writable */
226 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
227 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
228
229 /* load mc io regs */
230 for (i = 0; i < regs_size; i++) {
231 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
232 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
233 }
234 /* load the MC ucode */
235 fw_data = (const __be32 *)rdev->mc_fw->data;
236 for (i = 0; i < ucode_size; i++)
237 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
238
239 /* put the engine back into the active state */
240 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
241 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
242 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
243
244 /* wait for training to complete */
245 for (i = 0; i < rdev->usec_timeout; i++) {
246 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
247 break;
248 udelay(1);
249 }
250 for (i = 0; i < rdev->usec_timeout; i++) {
251 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
252 break;
253 udelay(1);
254 }
255
256 if (running)
257 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
258 }
259
260 return 0;
261}
262
Alex Deucher02c81322012-12-18 21:43:07 -0500263/**
264 * cik_init_microcode - load ucode images from disk
265 *
266 * @rdev: radeon_device pointer
267 *
268 * Use the firmware interface to load the ucode images into
269 * the driver (not loaded into hw).
270 * Returns 0 on success, error on failure.
271 */
272static int cik_init_microcode(struct radeon_device *rdev)
273{
274 struct platform_device *pdev;
275 const char *chip_name;
276 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400277 mec_req_size, rlc_req_size, mc_req_size,
278 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500279 char fw_name[30];
280 int err;
281
282 DRM_DEBUG("\n");
283
284 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
285 err = IS_ERR(pdev);
286 if (err) {
287 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
288 return -EINVAL;
289 }
290
291 switch (rdev->family) {
292 case CHIP_BONAIRE:
293 chip_name = "BONAIRE";
294 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
295 me_req_size = CIK_ME_UCODE_SIZE * 4;
296 ce_req_size = CIK_CE_UCODE_SIZE * 4;
297 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
298 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
299 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400300 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500301 break;
302 case CHIP_KAVERI:
303 chip_name = "KAVERI";
304 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
305 me_req_size = CIK_ME_UCODE_SIZE * 4;
306 ce_req_size = CIK_CE_UCODE_SIZE * 4;
307 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
308 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400309 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500310 break;
311 case CHIP_KABINI:
312 chip_name = "KABINI";
313 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
314 me_req_size = CIK_ME_UCODE_SIZE * 4;
315 ce_req_size = CIK_CE_UCODE_SIZE * 4;
316 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
317 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400318 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500319 break;
320 default: BUG();
321 }
322
323 DRM_INFO("Loading %s Microcode\n", chip_name);
324
325 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
326 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
327 if (err)
328 goto out;
329 if (rdev->pfp_fw->size != pfp_req_size) {
330 printk(KERN_ERR
331 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
332 rdev->pfp_fw->size, fw_name);
333 err = -EINVAL;
334 goto out;
335 }
336
337 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
338 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
339 if (err)
340 goto out;
341 if (rdev->me_fw->size != me_req_size) {
342 printk(KERN_ERR
343 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
344 rdev->me_fw->size, fw_name);
345 err = -EINVAL;
346 }
347
348 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
349 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
350 if (err)
351 goto out;
352 if (rdev->ce_fw->size != ce_req_size) {
353 printk(KERN_ERR
354 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
355 rdev->ce_fw->size, fw_name);
356 err = -EINVAL;
357 }
358
359 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
360 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
361 if (err)
362 goto out;
363 if (rdev->mec_fw->size != mec_req_size) {
364 printk(KERN_ERR
365 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
366 rdev->mec_fw->size, fw_name);
367 err = -EINVAL;
368 }
369
370 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
371 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
372 if (err)
373 goto out;
374 if (rdev->rlc_fw->size != rlc_req_size) {
375 printk(KERN_ERR
376 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
377 rdev->rlc_fw->size, fw_name);
378 err = -EINVAL;
379 }
380
Alex Deucher21a93e12013-04-09 12:47:11 -0400381 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
382 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
383 if (err)
384 goto out;
385 if (rdev->sdma_fw->size != sdma_req_size) {
386 printk(KERN_ERR
387 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
388 rdev->sdma_fw->size, fw_name);
389 err = -EINVAL;
390 }
391
Alex Deucher02c81322012-12-18 21:43:07 -0500392 /* No MC ucode on APUs */
393 if (!(rdev->flags & RADEON_IS_IGP)) {
394 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
395 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
396 if (err)
397 goto out;
398 if (rdev->mc_fw->size != mc_req_size) {
399 printk(KERN_ERR
400 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
401 rdev->mc_fw->size, fw_name);
402 err = -EINVAL;
403 }
404 }
405
406out:
407 platform_device_unregister(pdev);
408
409 if (err) {
410 if (err != -EINVAL)
411 printk(KERN_ERR
412 "cik_cp: Failed to load firmware \"%s\"\n",
413 fw_name);
414 release_firmware(rdev->pfp_fw);
415 rdev->pfp_fw = NULL;
416 release_firmware(rdev->me_fw);
417 rdev->me_fw = NULL;
418 release_firmware(rdev->ce_fw);
419 rdev->ce_fw = NULL;
420 release_firmware(rdev->rlc_fw);
421 rdev->rlc_fw = NULL;
422 release_firmware(rdev->mc_fw);
423 rdev->mc_fw = NULL;
424 }
425 return err;
426}
427
Alex Deucher8cc1a532013-04-09 12:41:24 -0400428/*
429 * Core functions
430 */
431/**
432 * cik_tiling_mode_table_init - init the hw tiling table
433 *
434 * @rdev: radeon_device pointer
435 *
436 * Starting with SI, the tiling setup is done globally in a
437 * set of 32 tiling modes. Rather than selecting each set of
438 * parameters per surface as on older asics, we just select
439 * which index in the tiling table we want to use, and the
440 * surface uses those parameters (CIK).
441 */
442static void cik_tiling_mode_table_init(struct radeon_device *rdev)
443{
444 const u32 num_tile_mode_states = 32;
445 const u32 num_secondary_tile_mode_states = 16;
446 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
447 u32 num_pipe_configs;
448 u32 num_rbs = rdev->config.cik.max_backends_per_se *
449 rdev->config.cik.max_shader_engines;
450
451 switch (rdev->config.cik.mem_row_size_in_kb) {
452 case 1:
453 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
454 break;
455 case 2:
456 default:
457 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
458 break;
459 case 4:
460 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
461 break;
462 }
463
464 num_pipe_configs = rdev->config.cik.max_tile_pipes;
465 if (num_pipe_configs > 8)
466 num_pipe_configs = 8; /* ??? */
467
468 if (num_pipe_configs == 8) {
469 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
470 switch (reg_offset) {
471 case 0:
472 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
474 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
475 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
476 break;
477 case 1:
478 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
479 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
481 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
482 break;
483 case 2:
484 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
485 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
486 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
487 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
488 break;
489 case 3:
490 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
491 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
493 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
494 break;
495 case 4:
496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
498 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
499 TILE_SPLIT(split_equal_to_row_size));
500 break;
501 case 5:
502 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
504 break;
505 case 6:
506 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
507 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
509 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
510 break;
511 case 7:
512 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
515 TILE_SPLIT(split_equal_to_row_size));
516 break;
517 case 8:
518 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
520 break;
521 case 9:
522 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
524 break;
525 case 10:
526 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
527 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
530 break;
531 case 11:
532 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
533 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
534 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
536 break;
537 case 12:
538 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
539 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
541 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
542 break;
543 case 13:
544 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
546 break;
547 case 14:
548 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
549 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
552 break;
553 case 16:
554 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
555 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
556 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
558 break;
559 case 17:
560 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
564 break;
565 case 27:
566 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
567 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
568 break;
569 case 28:
570 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
574 break;
575 case 29:
576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
580 break;
581 case 30:
582 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
586 break;
587 default:
588 gb_tile_moden = 0;
589 break;
590 }
591 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
592 }
593 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
594 switch (reg_offset) {
595 case 0:
596 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
597 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
598 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
599 NUM_BANKS(ADDR_SURF_16_BANK));
600 break;
601 case 1:
602 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
605 NUM_BANKS(ADDR_SURF_16_BANK));
606 break;
607 case 2:
608 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
611 NUM_BANKS(ADDR_SURF_16_BANK));
612 break;
613 case 3:
614 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
615 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
616 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
617 NUM_BANKS(ADDR_SURF_16_BANK));
618 break;
619 case 4:
620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
623 NUM_BANKS(ADDR_SURF_8_BANK));
624 break;
625 case 5:
626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
629 NUM_BANKS(ADDR_SURF_4_BANK));
630 break;
631 case 6:
632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
635 NUM_BANKS(ADDR_SURF_2_BANK));
636 break;
637 case 8:
638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
641 NUM_BANKS(ADDR_SURF_16_BANK));
642 break;
643 case 9:
644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
647 NUM_BANKS(ADDR_SURF_16_BANK));
648 break;
649 case 10:
650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
653 NUM_BANKS(ADDR_SURF_16_BANK));
654 break;
655 case 11:
656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
659 NUM_BANKS(ADDR_SURF_16_BANK));
660 break;
661 case 12:
662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
665 NUM_BANKS(ADDR_SURF_8_BANK));
666 break;
667 case 13:
668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
671 NUM_BANKS(ADDR_SURF_4_BANK));
672 break;
673 case 14:
674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
677 NUM_BANKS(ADDR_SURF_2_BANK));
678 break;
679 default:
680 gb_tile_moden = 0;
681 break;
682 }
683 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
684 }
685 } else if (num_pipe_configs == 4) {
686 if (num_rbs == 4) {
687 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
688 switch (reg_offset) {
689 case 0:
690 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
691 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
693 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
694 break;
695 case 1:
696 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
697 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
698 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
700 break;
701 case 2:
702 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
706 break;
707 case 3:
708 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
711 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
712 break;
713 case 4:
714 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
717 TILE_SPLIT(split_equal_to_row_size));
718 break;
719 case 5:
720 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
722 break;
723 case 6:
724 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
726 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
728 break;
729 case 7:
730 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
733 TILE_SPLIT(split_equal_to_row_size));
734 break;
735 case 8:
736 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
737 PIPE_CONFIG(ADDR_SURF_P4_16x16));
738 break;
739 case 9:
740 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
742 break;
743 case 10:
744 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
745 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
748 break;
749 case 11:
750 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
752 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
754 break;
755 case 12:
756 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
757 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
760 break;
761 case 13:
762 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
764 break;
765 case 14:
766 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
768 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
770 break;
771 case 16:
772 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
773 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
774 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
776 break;
777 case 17:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
780 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
782 break;
783 case 27:
784 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
785 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
786 break;
787 case 28:
788 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
790 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
792 break;
793 case 29:
794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
796 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
798 break;
799 case 30:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
804 break;
805 default:
806 gb_tile_moden = 0;
807 break;
808 }
809 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
810 }
811 } else if (num_rbs < 4) {
812 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
813 switch (reg_offset) {
814 case 0:
815 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
817 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
818 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
819 break;
820 case 1:
821 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
823 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
824 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
825 break;
826 case 2:
827 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
829 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
830 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
831 break;
832 case 3:
833 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
834 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
835 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
837 break;
838 case 4:
839 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
841 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
842 TILE_SPLIT(split_equal_to_row_size));
843 break;
844 case 5:
845 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
846 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
847 break;
848 case 6:
849 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
850 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
851 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
853 break;
854 case 7:
855 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
856 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
857 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
858 TILE_SPLIT(split_equal_to_row_size));
859 break;
860 case 8:
861 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
862 PIPE_CONFIG(ADDR_SURF_P4_8x16));
863 break;
864 case 9:
865 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
867 break;
868 case 10:
869 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
871 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
873 break;
874 case 11:
875 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
876 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
877 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
879 break;
880 case 12:
881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
883 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
885 break;
886 case 13:
887 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
889 break;
890 case 14:
891 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
892 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
893 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
895 break;
896 case 16:
897 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
899 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
901 break;
902 case 17:
903 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
905 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
907 break;
908 case 27:
909 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
910 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
911 break;
912 case 28:
913 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
917 break;
918 case 29:
919 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
923 break;
924 case 30:
925 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
927 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
929 break;
930 default:
931 gb_tile_moden = 0;
932 break;
933 }
934 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
935 }
936 }
937 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
938 switch (reg_offset) {
939 case 0:
940 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
943 NUM_BANKS(ADDR_SURF_16_BANK));
944 break;
945 case 1:
946 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
949 NUM_BANKS(ADDR_SURF_16_BANK));
950 break;
951 case 2:
952 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
955 NUM_BANKS(ADDR_SURF_16_BANK));
956 break;
957 case 3:
958 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
961 NUM_BANKS(ADDR_SURF_16_BANK));
962 break;
963 case 4:
964 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
967 NUM_BANKS(ADDR_SURF_16_BANK));
968 break;
969 case 5:
970 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
973 NUM_BANKS(ADDR_SURF_8_BANK));
974 break;
975 case 6:
976 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
979 NUM_BANKS(ADDR_SURF_4_BANK));
980 break;
981 case 8:
982 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
985 NUM_BANKS(ADDR_SURF_16_BANK));
986 break;
987 case 9:
988 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
991 NUM_BANKS(ADDR_SURF_16_BANK));
992 break;
993 case 10:
994 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
997 NUM_BANKS(ADDR_SURF_16_BANK));
998 break;
999 case 11:
1000 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1003 NUM_BANKS(ADDR_SURF_16_BANK));
1004 break;
1005 case 12:
1006 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1009 NUM_BANKS(ADDR_SURF_16_BANK));
1010 break;
1011 case 13:
1012 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1015 NUM_BANKS(ADDR_SURF_8_BANK));
1016 break;
1017 case 14:
1018 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1021 NUM_BANKS(ADDR_SURF_4_BANK));
1022 break;
1023 default:
1024 gb_tile_moden = 0;
1025 break;
1026 }
1027 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1028 }
1029 } else if (num_pipe_configs == 2) {
1030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1031 switch (reg_offset) {
1032 case 0:
1033 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1035 PIPE_CONFIG(ADDR_SURF_P2) |
1036 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1037 break;
1038 case 1:
1039 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1041 PIPE_CONFIG(ADDR_SURF_P2) |
1042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1043 break;
1044 case 2:
1045 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1047 PIPE_CONFIG(ADDR_SURF_P2) |
1048 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1049 break;
1050 case 3:
1051 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1053 PIPE_CONFIG(ADDR_SURF_P2) |
1054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1055 break;
1056 case 4:
1057 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1059 PIPE_CONFIG(ADDR_SURF_P2) |
1060 TILE_SPLIT(split_equal_to_row_size));
1061 break;
1062 case 5:
1063 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1065 break;
1066 case 6:
1067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1069 PIPE_CONFIG(ADDR_SURF_P2) |
1070 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1071 break;
1072 case 7:
1073 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1074 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1075 PIPE_CONFIG(ADDR_SURF_P2) |
1076 TILE_SPLIT(split_equal_to_row_size));
1077 break;
1078 case 8:
1079 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1080 break;
1081 case 9:
1082 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1083 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1084 break;
1085 case 10:
1086 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1088 PIPE_CONFIG(ADDR_SURF_P2) |
1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1090 break;
1091 case 11:
1092 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1094 PIPE_CONFIG(ADDR_SURF_P2) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1096 break;
1097 case 12:
1098 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1099 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1100 PIPE_CONFIG(ADDR_SURF_P2) |
1101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1102 break;
1103 case 13:
1104 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1105 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1106 break;
1107 case 14:
1108 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1109 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1110 PIPE_CONFIG(ADDR_SURF_P2) |
1111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1112 break;
1113 case 16:
1114 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1115 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1116 PIPE_CONFIG(ADDR_SURF_P2) |
1117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1118 break;
1119 case 17:
1120 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1122 PIPE_CONFIG(ADDR_SURF_P2) |
1123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1124 break;
1125 case 27:
1126 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1128 break;
1129 case 28:
1130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1132 PIPE_CONFIG(ADDR_SURF_P2) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1134 break;
1135 case 29:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P2) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1140 break;
1141 case 30:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1144 PIPE_CONFIG(ADDR_SURF_P2) |
1145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1146 break;
1147 default:
1148 gb_tile_moden = 0;
1149 break;
1150 }
1151 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1152 }
1153 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1154 switch (reg_offset) {
1155 case 0:
1156 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1159 NUM_BANKS(ADDR_SURF_16_BANK));
1160 break;
1161 case 1:
1162 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 break;
1167 case 2:
1168 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1171 NUM_BANKS(ADDR_SURF_16_BANK));
1172 break;
1173 case 3:
1174 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1177 NUM_BANKS(ADDR_SURF_16_BANK));
1178 break;
1179 case 4:
1180 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1183 NUM_BANKS(ADDR_SURF_16_BANK));
1184 break;
1185 case 5:
1186 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1189 NUM_BANKS(ADDR_SURF_16_BANK));
1190 break;
1191 case 6:
1192 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1195 NUM_BANKS(ADDR_SURF_8_BANK));
1196 break;
1197 case 8:
1198 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1201 NUM_BANKS(ADDR_SURF_16_BANK));
1202 break;
1203 case 9:
1204 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1207 NUM_BANKS(ADDR_SURF_16_BANK));
1208 break;
1209 case 10:
1210 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1213 NUM_BANKS(ADDR_SURF_16_BANK));
1214 break;
1215 case 11:
1216 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1219 NUM_BANKS(ADDR_SURF_16_BANK));
1220 break;
1221 case 12:
1222 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1225 NUM_BANKS(ADDR_SURF_16_BANK));
1226 break;
1227 case 13:
1228 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1231 NUM_BANKS(ADDR_SURF_16_BANK));
1232 break;
1233 case 14:
1234 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1237 NUM_BANKS(ADDR_SURF_8_BANK));
1238 break;
1239 default:
1240 gb_tile_moden = 0;
1241 break;
1242 }
1243 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1244 }
1245 } else
1246 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1247}
1248
1249/**
1250 * cik_select_se_sh - select which SE, SH to address
1251 *
1252 * @rdev: radeon_device pointer
1253 * @se_num: shader engine to address
1254 * @sh_num: sh block to address
1255 *
1256 * Select which SE, SH combinations to address. Certain
1257 * registers are instanced per SE or SH. 0xffffffff means
1258 * broadcast to all SEs or SHs (CIK).
1259 */
1260static void cik_select_se_sh(struct radeon_device *rdev,
1261 u32 se_num, u32 sh_num)
1262{
1263 u32 data = INSTANCE_BROADCAST_WRITES;
1264
1265 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1266 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1267 else if (se_num == 0xffffffff)
1268 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1269 else if (sh_num == 0xffffffff)
1270 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1271 else
1272 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1273 WREG32(GRBM_GFX_INDEX, data);
1274}
1275
1276/**
1277 * cik_create_bitmask - create a bitmask
1278 *
1279 * @bit_width: length of the mask
1280 *
1281 * create a variable length bit mask (CIK).
1282 * Returns the bitmask.
1283 */
1284static u32 cik_create_bitmask(u32 bit_width)
1285{
1286 u32 i, mask = 0;
1287
1288 for (i = 0; i < bit_width; i++) {
1289 mask <<= 1;
1290 mask |= 1;
1291 }
1292 return mask;
1293}
1294
1295/**
1296 * cik_select_se_sh - select which SE, SH to address
1297 *
1298 * @rdev: radeon_device pointer
1299 * @max_rb_num: max RBs (render backends) for the asic
1300 * @se_num: number of SEs (shader engines) for the asic
1301 * @sh_per_se: number of SH blocks per SE for the asic
1302 *
1303 * Calculates the bitmask of disabled RBs (CIK).
1304 * Returns the disabled RB bitmask.
1305 */
1306static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1307 u32 max_rb_num, u32 se_num,
1308 u32 sh_per_se)
1309{
1310 u32 data, mask;
1311
1312 data = RREG32(CC_RB_BACKEND_DISABLE);
1313 if (data & 1)
1314 data &= BACKEND_DISABLE_MASK;
1315 else
1316 data = 0;
1317 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1318
1319 data >>= BACKEND_DISABLE_SHIFT;
1320
1321 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1322
1323 return data & mask;
1324}
1325
1326/**
1327 * cik_setup_rb - setup the RBs on the asic
1328 *
1329 * @rdev: radeon_device pointer
1330 * @se_num: number of SEs (shader engines) for the asic
1331 * @sh_per_se: number of SH blocks per SE for the asic
1332 * @max_rb_num: max RBs (render backends) for the asic
1333 *
1334 * Configures per-SE/SH RB registers (CIK).
1335 */
1336static void cik_setup_rb(struct radeon_device *rdev,
1337 u32 se_num, u32 sh_per_se,
1338 u32 max_rb_num)
1339{
1340 int i, j;
1341 u32 data, mask;
1342 u32 disabled_rbs = 0;
1343 u32 enabled_rbs = 0;
1344
1345 for (i = 0; i < se_num; i++) {
1346 for (j = 0; j < sh_per_se; j++) {
1347 cik_select_se_sh(rdev, i, j);
1348 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1349 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1350 }
1351 }
1352 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1353
1354 mask = 1;
1355 for (i = 0; i < max_rb_num; i++) {
1356 if (!(disabled_rbs & mask))
1357 enabled_rbs |= mask;
1358 mask <<= 1;
1359 }
1360
1361 for (i = 0; i < se_num; i++) {
1362 cik_select_se_sh(rdev, i, 0xffffffff);
1363 data = 0;
1364 for (j = 0; j < sh_per_se; j++) {
1365 switch (enabled_rbs & 3) {
1366 case 1:
1367 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1368 break;
1369 case 2:
1370 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1371 break;
1372 case 3:
1373 default:
1374 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1375 break;
1376 }
1377 enabled_rbs >>= 2;
1378 }
1379 WREG32(PA_SC_RASTER_CONFIG, data);
1380 }
1381 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1382}
1383
1384/**
1385 * cik_gpu_init - setup the 3D engine
1386 *
1387 * @rdev: radeon_device pointer
1388 *
1389 * Configures the 3D engine and tiling configuration
1390 * registers so that the 3D engine is usable.
1391 */
1392static void cik_gpu_init(struct radeon_device *rdev)
1393{
1394 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1395 u32 mc_shared_chmap, mc_arb_ramcfg;
1396 u32 hdp_host_path_cntl;
1397 u32 tmp;
1398 int i, j;
1399
1400 switch (rdev->family) {
1401 case CHIP_BONAIRE:
1402 rdev->config.cik.max_shader_engines = 2;
1403 rdev->config.cik.max_tile_pipes = 4;
1404 rdev->config.cik.max_cu_per_sh = 7;
1405 rdev->config.cik.max_sh_per_se = 1;
1406 rdev->config.cik.max_backends_per_se = 2;
1407 rdev->config.cik.max_texture_channel_caches = 4;
1408 rdev->config.cik.max_gprs = 256;
1409 rdev->config.cik.max_gs_threads = 32;
1410 rdev->config.cik.max_hw_contexts = 8;
1411
1412 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1413 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1414 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1415 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1416 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1417 break;
1418 case CHIP_KAVERI:
1419 /* TODO */
1420 break;
1421 case CHIP_KABINI:
1422 default:
1423 rdev->config.cik.max_shader_engines = 1;
1424 rdev->config.cik.max_tile_pipes = 2;
1425 rdev->config.cik.max_cu_per_sh = 2;
1426 rdev->config.cik.max_sh_per_se = 1;
1427 rdev->config.cik.max_backends_per_se = 1;
1428 rdev->config.cik.max_texture_channel_caches = 2;
1429 rdev->config.cik.max_gprs = 256;
1430 rdev->config.cik.max_gs_threads = 16;
1431 rdev->config.cik.max_hw_contexts = 8;
1432
1433 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1434 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1435 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1436 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1437 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1438 break;
1439 }
1440
1441 /* Initialize HDP */
1442 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1443 WREG32((0x2c14 + j), 0x00000000);
1444 WREG32((0x2c18 + j), 0x00000000);
1445 WREG32((0x2c1c + j), 0x00000000);
1446 WREG32((0x2c20 + j), 0x00000000);
1447 WREG32((0x2c24 + j), 0x00000000);
1448 }
1449
1450 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1451
1452 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1453
1454 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1455 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1456
1457 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1458 rdev->config.cik.mem_max_burst_length_bytes = 256;
1459 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1460 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1461 if (rdev->config.cik.mem_row_size_in_kb > 4)
1462 rdev->config.cik.mem_row_size_in_kb = 4;
1463 /* XXX use MC settings? */
1464 rdev->config.cik.shader_engine_tile_size = 32;
1465 rdev->config.cik.num_gpus = 1;
1466 rdev->config.cik.multi_gpu_tile_size = 64;
1467
1468 /* fix up row size */
1469 gb_addr_config &= ~ROW_SIZE_MASK;
1470 switch (rdev->config.cik.mem_row_size_in_kb) {
1471 case 1:
1472 default:
1473 gb_addr_config |= ROW_SIZE(0);
1474 break;
1475 case 2:
1476 gb_addr_config |= ROW_SIZE(1);
1477 break;
1478 case 4:
1479 gb_addr_config |= ROW_SIZE(2);
1480 break;
1481 }
1482
1483 /* setup tiling info dword. gb_addr_config is not adequate since it does
1484 * not have bank info, so create a custom tiling dword.
1485 * bits 3:0 num_pipes
1486 * bits 7:4 num_banks
1487 * bits 11:8 group_size
1488 * bits 15:12 row_size
1489 */
1490 rdev->config.cik.tile_config = 0;
1491 switch (rdev->config.cik.num_tile_pipes) {
1492 case 1:
1493 rdev->config.cik.tile_config |= (0 << 0);
1494 break;
1495 case 2:
1496 rdev->config.cik.tile_config |= (1 << 0);
1497 break;
1498 case 4:
1499 rdev->config.cik.tile_config |= (2 << 0);
1500 break;
1501 case 8:
1502 default:
1503 /* XXX what about 12? */
1504 rdev->config.cik.tile_config |= (3 << 0);
1505 break;
1506 }
1507 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1508 rdev->config.cik.tile_config |= 1 << 4;
1509 else
1510 rdev->config.cik.tile_config |= 0 << 4;
1511 rdev->config.cik.tile_config |=
1512 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1513 rdev->config.cik.tile_config |=
1514 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1515
1516 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1517 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1518 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001519 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1520 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001521 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1522 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1523 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001524
1525 cik_tiling_mode_table_init(rdev);
1526
1527 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1528 rdev->config.cik.max_sh_per_se,
1529 rdev->config.cik.max_backends_per_se);
1530
1531 /* set HW defaults for 3D engine */
1532 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1533
1534 WREG32(SX_DEBUG_1, 0x20);
1535
1536 WREG32(TA_CNTL_AUX, 0x00010000);
1537
1538 tmp = RREG32(SPI_CONFIG_CNTL);
1539 tmp |= 0x03000000;
1540 WREG32(SPI_CONFIG_CNTL, tmp);
1541
1542 WREG32(SQ_CONFIG, 1);
1543
1544 WREG32(DB_DEBUG, 0);
1545
1546 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1547 tmp |= 0x00000400;
1548 WREG32(DB_DEBUG2, tmp);
1549
1550 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1551 tmp |= 0x00020200;
1552 WREG32(DB_DEBUG3, tmp);
1553
1554 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1555 tmp |= 0x00018208;
1556 WREG32(CB_HW_CONTROL, tmp);
1557
1558 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1559
1560 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1561 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1562 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1563 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1564
1565 WREG32(VGT_NUM_INSTANCES, 1);
1566
1567 WREG32(CP_PERFMON_CNTL, 0);
1568
1569 WREG32(SQ_CONFIG, 0);
1570
1571 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1572 FORCE_EOV_MAX_REZ_CNT(255)));
1573
1574 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1575 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1576
1577 WREG32(VGT_GS_VERTEX_REUSE, 16);
1578 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1579
1580 tmp = RREG32(HDP_MISC_CNTL);
1581 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1582 WREG32(HDP_MISC_CNTL, tmp);
1583
1584 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1585 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1586
1587 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1588 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1589
1590 udelay(50);
1591}
1592
Alex Deucher841cf442012-12-18 21:47:44 -05001593/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001594 * GPU scratch registers helpers function.
1595 */
1596/**
1597 * cik_scratch_init - setup driver info for CP scratch regs
1598 *
1599 * @rdev: radeon_device pointer
1600 *
1601 * Set up the number and offset of the CP scratch registers.
1602 * NOTE: use of CP scratch registers is a legacy inferface and
1603 * is not used by default on newer asics (r6xx+). On newer asics,
1604 * memory buffers are used for fences rather than scratch regs.
1605 */
1606static void cik_scratch_init(struct radeon_device *rdev)
1607{
1608 int i;
1609
1610 rdev->scratch.num_reg = 7;
1611 rdev->scratch.reg_base = SCRATCH_REG0;
1612 for (i = 0; i < rdev->scratch.num_reg; i++) {
1613 rdev->scratch.free[i] = true;
1614 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1615 }
1616}
1617
1618/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001619 * cik_ring_test - basic gfx ring test
1620 *
1621 * @rdev: radeon_device pointer
1622 * @ring: radeon_ring structure holding ring information
1623 *
1624 * Allocate a scratch register and write to it using the gfx ring (CIK).
1625 * Provides a basic gfx ring test to verify that the ring is working.
1626 * Used by cik_cp_gfx_resume();
1627 * Returns 0 on success, error on failure.
1628 */
1629int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1630{
1631 uint32_t scratch;
1632 uint32_t tmp = 0;
1633 unsigned i;
1634 int r;
1635
1636 r = radeon_scratch_get(rdev, &scratch);
1637 if (r) {
1638 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1639 return r;
1640 }
1641 WREG32(scratch, 0xCAFEDEAD);
1642 r = radeon_ring_lock(rdev, ring, 3);
1643 if (r) {
1644 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1645 radeon_scratch_free(rdev, scratch);
1646 return r;
1647 }
1648 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1649 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1650 radeon_ring_write(ring, 0xDEADBEEF);
1651 radeon_ring_unlock_commit(rdev, ring);
1652 for (i = 0; i < rdev->usec_timeout; i++) {
1653 tmp = RREG32(scratch);
1654 if (tmp == 0xDEADBEEF)
1655 break;
1656 DRM_UDELAY(1);
1657 }
1658 if (i < rdev->usec_timeout) {
1659 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1660 } else {
1661 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1662 ring->idx, scratch, tmp);
1663 r = -EINVAL;
1664 }
1665 radeon_scratch_free(rdev, scratch);
1666 return r;
1667}
1668
1669/**
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001670 * cik_fence_ring_emit - emit a fence on the gfx ring
1671 *
1672 * @rdev: radeon_device pointer
1673 * @fence: radeon fence object
1674 *
1675 * Emits a fence sequnce number on the gfx ring and flushes
1676 * GPU caches.
1677 */
1678void cik_fence_ring_emit(struct radeon_device *rdev,
1679 struct radeon_fence *fence)
1680{
1681 struct radeon_ring *ring = &rdev->ring[fence->ring];
1682 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1683
1684 /* EVENT_WRITE_EOP - flush caches, send int */
1685 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1686 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1687 EOP_TC_ACTION_EN |
1688 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1689 EVENT_INDEX(5)));
1690 radeon_ring_write(ring, addr & 0xfffffffc);
1691 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1692 radeon_ring_write(ring, fence->seq);
1693 radeon_ring_write(ring, 0);
1694 /* HDP flush */
1695 /* We should be using the new WAIT_REG_MEM special op packet here
1696 * but it causes the CP to hang
1697 */
1698 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1699 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1700 WRITE_DATA_DST_SEL(0)));
1701 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1702 radeon_ring_write(ring, 0);
1703 radeon_ring_write(ring, 0);
1704}
1705
1706void cik_semaphore_ring_emit(struct radeon_device *rdev,
1707 struct radeon_ring *ring,
1708 struct radeon_semaphore *semaphore,
1709 bool emit_wait)
1710{
1711 uint64_t addr = semaphore->gpu_addr;
1712 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1713
1714 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1715 radeon_ring_write(ring, addr & 0xffffffff);
1716 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1717}
1718
1719/*
1720 * IB stuff
1721 */
1722/**
1723 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1724 *
1725 * @rdev: radeon_device pointer
1726 * @ib: radeon indirect buffer object
1727 *
1728 * Emits an DE (drawing engine) or CE (constant engine) IB
1729 * on the gfx ring. IBs are usually generated by userspace
1730 * acceleration drivers and submitted to the kernel for
1731 * sheduling on the ring. This function schedules the IB
1732 * on the gfx ring for execution by the GPU.
1733 */
1734void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1735{
1736 struct radeon_ring *ring = &rdev->ring[ib->ring];
1737 u32 header, control = INDIRECT_BUFFER_VALID;
1738
1739 if (ib->is_const_ib) {
1740 /* set switch buffer packet before const IB */
1741 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1742 radeon_ring_write(ring, 0);
1743
1744 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1745 } else {
1746 u32 next_rptr;
1747 if (ring->rptr_save_reg) {
1748 next_rptr = ring->wptr + 3 + 4;
1749 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1750 radeon_ring_write(ring, ((ring->rptr_save_reg -
1751 PACKET3_SET_UCONFIG_REG_START) >> 2));
1752 radeon_ring_write(ring, next_rptr);
1753 } else if (rdev->wb.enabled) {
1754 next_rptr = ring->wptr + 5 + 4;
1755 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1756 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1757 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1758 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1759 radeon_ring_write(ring, next_rptr);
1760 }
1761
1762 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1763 }
1764
1765 control |= ib->length_dw |
1766 (ib->vm ? (ib->vm->id << 24) : 0);
1767
1768 radeon_ring_write(ring, header);
1769 radeon_ring_write(ring,
1770#ifdef __BIG_ENDIAN
1771 (2 << 0) |
1772#endif
1773 (ib->gpu_addr & 0xFFFFFFFC));
1774 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1775 radeon_ring_write(ring, control);
1776}
1777
Alex Deucherfbc832c2012-07-20 14:41:35 -04001778/**
1779 * cik_ib_test - basic gfx ring IB test
1780 *
1781 * @rdev: radeon_device pointer
1782 * @ring: radeon_ring structure holding ring information
1783 *
1784 * Allocate an IB and execute it on the gfx ring (CIK).
1785 * Provides a basic gfx ring test to verify that IBs are working.
1786 * Returns 0 on success, error on failure.
1787 */
1788int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1789{
1790 struct radeon_ib ib;
1791 uint32_t scratch;
1792 uint32_t tmp = 0;
1793 unsigned i;
1794 int r;
1795
1796 r = radeon_scratch_get(rdev, &scratch);
1797 if (r) {
1798 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1799 return r;
1800 }
1801 WREG32(scratch, 0xCAFEDEAD);
1802 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1803 if (r) {
1804 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1805 return r;
1806 }
1807 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1808 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1809 ib.ptr[2] = 0xDEADBEEF;
1810 ib.length_dw = 3;
1811 r = radeon_ib_schedule(rdev, &ib, NULL);
1812 if (r) {
1813 radeon_scratch_free(rdev, scratch);
1814 radeon_ib_free(rdev, &ib);
1815 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1816 return r;
1817 }
1818 r = radeon_fence_wait(ib.fence, false);
1819 if (r) {
1820 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1821 return r;
1822 }
1823 for (i = 0; i < rdev->usec_timeout; i++) {
1824 tmp = RREG32(scratch);
1825 if (tmp == 0xDEADBEEF)
1826 break;
1827 DRM_UDELAY(1);
1828 }
1829 if (i < rdev->usec_timeout) {
1830 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1831 } else {
1832 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1833 scratch, tmp);
1834 r = -EINVAL;
1835 }
1836 radeon_scratch_free(rdev, scratch);
1837 radeon_ib_free(rdev, &ib);
1838 return r;
1839}
1840
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001841/*
Alex Deucher841cf442012-12-18 21:47:44 -05001842 * CP.
1843 * On CIK, gfx and compute now have independant command processors.
1844 *
1845 * GFX
1846 * Gfx consists of a single ring and can process both gfx jobs and
1847 * compute jobs. The gfx CP consists of three microengines (ME):
1848 * PFP - Pre-Fetch Parser
1849 * ME - Micro Engine
1850 * CE - Constant Engine
1851 * The PFP and ME make up what is considered the Drawing Engine (DE).
1852 * The CE is an asynchronous engine used for updating buffer desciptors
1853 * used by the DE so that they can be loaded into cache in parallel
1854 * while the DE is processing state update packets.
1855 *
1856 * Compute
1857 * The compute CP consists of two microengines (ME):
1858 * MEC1 - Compute MicroEngine 1
1859 * MEC2 - Compute MicroEngine 2
1860 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1861 * The queues are exposed to userspace and are programmed directly
1862 * by the compute runtime.
1863 */
1864/**
1865 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1866 *
1867 * @rdev: radeon_device pointer
1868 * @enable: enable or disable the MEs
1869 *
1870 * Halts or unhalts the gfx MEs.
1871 */
1872static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1873{
1874 if (enable)
1875 WREG32(CP_ME_CNTL, 0);
1876 else {
1877 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1878 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1879 }
1880 udelay(50);
1881}
1882
1883/**
1884 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1885 *
1886 * @rdev: radeon_device pointer
1887 *
1888 * Loads the gfx PFP, ME, and CE ucode.
1889 * Returns 0 for success, -EINVAL if the ucode is not available.
1890 */
1891static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1892{
1893 const __be32 *fw_data;
1894 int i;
1895
1896 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1897 return -EINVAL;
1898
1899 cik_cp_gfx_enable(rdev, false);
1900
1901 /* PFP */
1902 fw_data = (const __be32 *)rdev->pfp_fw->data;
1903 WREG32(CP_PFP_UCODE_ADDR, 0);
1904 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1905 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1906 WREG32(CP_PFP_UCODE_ADDR, 0);
1907
1908 /* CE */
1909 fw_data = (const __be32 *)rdev->ce_fw->data;
1910 WREG32(CP_CE_UCODE_ADDR, 0);
1911 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1912 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1913 WREG32(CP_CE_UCODE_ADDR, 0);
1914
1915 /* ME */
1916 fw_data = (const __be32 *)rdev->me_fw->data;
1917 WREG32(CP_ME_RAM_WADDR, 0);
1918 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1919 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1920 WREG32(CP_ME_RAM_WADDR, 0);
1921
1922 WREG32(CP_PFP_UCODE_ADDR, 0);
1923 WREG32(CP_CE_UCODE_ADDR, 0);
1924 WREG32(CP_ME_RAM_WADDR, 0);
1925 WREG32(CP_ME_RAM_RADDR, 0);
1926 return 0;
1927}
1928
1929/**
1930 * cik_cp_gfx_start - start the gfx ring
1931 *
1932 * @rdev: radeon_device pointer
1933 *
1934 * Enables the ring and loads the clear state context and other
1935 * packets required to init the ring.
1936 * Returns 0 for success, error for failure.
1937 */
1938static int cik_cp_gfx_start(struct radeon_device *rdev)
1939{
1940 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1941 int r, i;
1942
1943 /* init the CP */
1944 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1945 WREG32(CP_ENDIAN_SWAP, 0);
1946 WREG32(CP_DEVICE_ID, 1);
1947
1948 cik_cp_gfx_enable(rdev, true);
1949
1950 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1951 if (r) {
1952 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1953 return r;
1954 }
1955
1956 /* init the CE partitions. CE only used for gfx on CIK */
1957 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1958 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1959 radeon_ring_write(ring, 0xc000);
1960 radeon_ring_write(ring, 0xc000);
1961
1962 /* setup clear context state */
1963 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1964 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1965
1966 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1967 radeon_ring_write(ring, 0x80000000);
1968 radeon_ring_write(ring, 0x80000000);
1969
1970 for (i = 0; i < cik_default_size; i++)
1971 radeon_ring_write(ring, cik_default_state[i]);
1972
1973 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1974 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1975
1976 /* set clear context state */
1977 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1978 radeon_ring_write(ring, 0);
1979
1980 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1981 radeon_ring_write(ring, 0x00000316);
1982 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1983 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1984
1985 radeon_ring_unlock_commit(rdev, ring);
1986
1987 return 0;
1988}
1989
1990/**
1991 * cik_cp_gfx_fini - stop the gfx ring
1992 *
1993 * @rdev: radeon_device pointer
1994 *
1995 * Stop the gfx ring and tear down the driver ring
1996 * info.
1997 */
1998static void cik_cp_gfx_fini(struct radeon_device *rdev)
1999{
2000 cik_cp_gfx_enable(rdev, false);
2001 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2002}
2003
2004/**
2005 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2006 *
2007 * @rdev: radeon_device pointer
2008 *
2009 * Program the location and size of the gfx ring buffer
2010 * and test it to make sure it's working.
2011 * Returns 0 for success, error for failure.
2012 */
2013static int cik_cp_gfx_resume(struct radeon_device *rdev)
2014{
2015 struct radeon_ring *ring;
2016 u32 tmp;
2017 u32 rb_bufsz;
2018 u64 rb_addr;
2019 int r;
2020
2021 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2022 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2023
2024 /* Set the write pointer delay */
2025 WREG32(CP_RB_WPTR_DELAY, 0);
2026
2027 /* set the RB to use vmid 0 */
2028 WREG32(CP_RB_VMID, 0);
2029
2030 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2031
2032 /* ring 0 - compute and gfx */
2033 /* Set ring buffer size */
2034 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2035 rb_bufsz = drm_order(ring->ring_size / 8);
2036 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2037#ifdef __BIG_ENDIAN
2038 tmp |= BUF_SWAP_32BIT;
2039#endif
2040 WREG32(CP_RB0_CNTL, tmp);
2041
2042 /* Initialize the ring buffer's read and write pointers */
2043 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2044 ring->wptr = 0;
2045 WREG32(CP_RB0_WPTR, ring->wptr);
2046
2047 /* set the wb address wether it's enabled or not */
2048 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2049 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2050
2051 /* scratch register shadowing is no longer supported */
2052 WREG32(SCRATCH_UMSK, 0);
2053
2054 if (!rdev->wb.enabled)
2055 tmp |= RB_NO_UPDATE;
2056
2057 mdelay(1);
2058 WREG32(CP_RB0_CNTL, tmp);
2059
2060 rb_addr = ring->gpu_addr >> 8;
2061 WREG32(CP_RB0_BASE, rb_addr);
2062 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2063
2064 ring->rptr = RREG32(CP_RB0_RPTR);
2065
2066 /* start the ring */
2067 cik_cp_gfx_start(rdev);
2068 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2069 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2070 if (r) {
2071 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2072 return r;
2073 }
2074 return 0;
2075}
2076
2077/**
2078 * cik_cp_compute_enable - enable/disable the compute CP MEs
2079 *
2080 * @rdev: radeon_device pointer
2081 * @enable: enable or disable the MEs
2082 *
2083 * Halts or unhalts the compute MEs.
2084 */
2085static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2086{
2087 if (enable)
2088 WREG32(CP_MEC_CNTL, 0);
2089 else
2090 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2091 udelay(50);
2092}
2093
2094/**
2095 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2096 *
2097 * @rdev: radeon_device pointer
2098 *
2099 * Loads the compute MEC1&2 ucode.
2100 * Returns 0 for success, -EINVAL if the ucode is not available.
2101 */
2102static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2103{
2104 const __be32 *fw_data;
2105 int i;
2106
2107 if (!rdev->mec_fw)
2108 return -EINVAL;
2109
2110 cik_cp_compute_enable(rdev, false);
2111
2112 /* MEC1 */
2113 fw_data = (const __be32 *)rdev->mec_fw->data;
2114 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2115 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2116 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2117 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2118
2119 if (rdev->family == CHIP_KAVERI) {
2120 /* MEC2 */
2121 fw_data = (const __be32 *)rdev->mec_fw->data;
2122 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2123 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2124 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2125 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2126 }
2127
2128 return 0;
2129}
2130
2131/**
2132 * cik_cp_compute_start - start the compute queues
2133 *
2134 * @rdev: radeon_device pointer
2135 *
2136 * Enable the compute queues.
2137 * Returns 0 for success, error for failure.
2138 */
2139static int cik_cp_compute_start(struct radeon_device *rdev)
2140{
2141 //todo
2142 return 0;
2143}
2144
2145/**
2146 * cik_cp_compute_fini - stop the compute queues
2147 *
2148 * @rdev: radeon_device pointer
2149 *
2150 * Stop the compute queues and tear down the driver queue
2151 * info.
2152 */
2153static void cik_cp_compute_fini(struct radeon_device *rdev)
2154{
2155 cik_cp_compute_enable(rdev, false);
2156 //todo
2157}
2158
2159/**
2160 * cik_cp_compute_resume - setup the compute queue registers
2161 *
2162 * @rdev: radeon_device pointer
2163 *
2164 * Program the compute queues and test them to make sure they
2165 * are working.
2166 * Returns 0 for success, error for failure.
2167 */
2168static int cik_cp_compute_resume(struct radeon_device *rdev)
2169{
2170 int r;
2171
2172 //todo
2173 r = cik_cp_compute_start(rdev);
2174 if (r)
2175 return r;
2176 return 0;
2177}
2178
2179/* XXX temporary wrappers to handle both compute and gfx */
2180/* XXX */
2181static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2182{
2183 cik_cp_gfx_enable(rdev, enable);
2184 cik_cp_compute_enable(rdev, enable);
2185}
2186
2187/* XXX */
2188static int cik_cp_load_microcode(struct radeon_device *rdev)
2189{
2190 int r;
2191
2192 r = cik_cp_gfx_load_microcode(rdev);
2193 if (r)
2194 return r;
2195 r = cik_cp_compute_load_microcode(rdev);
2196 if (r)
2197 return r;
2198
2199 return 0;
2200}
2201
2202/* XXX */
2203static void cik_cp_fini(struct radeon_device *rdev)
2204{
2205 cik_cp_gfx_fini(rdev);
2206 cik_cp_compute_fini(rdev);
2207}
2208
2209/* XXX */
2210static int cik_cp_resume(struct radeon_device *rdev)
2211{
2212 int r;
2213
2214 /* Reset all cp blocks */
2215 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2216 RREG32(GRBM_SOFT_RESET);
2217 mdelay(15);
2218 WREG32(GRBM_SOFT_RESET, 0);
2219 RREG32(GRBM_SOFT_RESET);
2220
2221 r = cik_cp_load_microcode(rdev);
2222 if (r)
2223 return r;
2224
2225 r = cik_cp_gfx_resume(rdev);
2226 if (r)
2227 return r;
2228 r = cik_cp_compute_resume(rdev);
2229 if (r)
2230 return r;
2231
2232 return 0;
2233}
2234
Alex Deucher21a93e12013-04-09 12:47:11 -04002235/*
2236 * sDMA - System DMA
2237 * Starting with CIK, the GPU has new asynchronous
2238 * DMA engines. These engines are used for compute
2239 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2240 * and each one supports 1 ring buffer used for gfx
2241 * and 2 queues used for compute.
2242 *
2243 * The programming model is very similar to the CP
2244 * (ring buffer, IBs, etc.), but sDMA has it's own
2245 * packet format that is different from the PM4 format
2246 * used by the CP. sDMA supports copying data, writing
2247 * embedded data, solid fills, and a number of other
2248 * things. It also has support for tiling/detiling of
2249 * buffers.
2250 */
2251/**
2252 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2253 *
2254 * @rdev: radeon_device pointer
2255 * @ib: IB object to schedule
2256 *
2257 * Schedule an IB in the DMA ring (CIK).
2258 */
2259void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2260 struct radeon_ib *ib)
2261{
2262 struct radeon_ring *ring = &rdev->ring[ib->ring];
2263 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2264
2265 if (rdev->wb.enabled) {
2266 u32 next_rptr = ring->wptr + 5;
2267 while ((next_rptr & 7) != 4)
2268 next_rptr++;
2269 next_rptr += 4;
2270 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2271 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2272 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2273 radeon_ring_write(ring, 1); /* number of DWs to follow */
2274 radeon_ring_write(ring, next_rptr);
2275 }
2276
2277 /* IB packet must end on a 8 DW boundary */
2278 while ((ring->wptr & 7) != 4)
2279 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2280 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2281 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2282 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2283 radeon_ring_write(ring, ib->length_dw);
2284
2285}
2286
2287/**
2288 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2289 *
2290 * @rdev: radeon_device pointer
2291 * @fence: radeon fence object
2292 *
2293 * Add a DMA fence packet to the ring to write
2294 * the fence seq number and DMA trap packet to generate
2295 * an interrupt if needed (CIK).
2296 */
2297void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2298 struct radeon_fence *fence)
2299{
2300 struct radeon_ring *ring = &rdev->ring[fence->ring];
2301 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2302 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2303 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2304 u32 ref_and_mask;
2305
2306 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2307 ref_and_mask = SDMA0;
2308 else
2309 ref_and_mask = SDMA1;
2310
2311 /* write the fence */
2312 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2313 radeon_ring_write(ring, addr & 0xffffffff);
2314 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2315 radeon_ring_write(ring, fence->seq);
2316 /* generate an interrupt */
2317 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2318 /* flush HDP */
2319 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2320 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2321 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2322 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2323 radeon_ring_write(ring, ref_and_mask); /* MASK */
2324 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2325}
2326
2327/**
2328 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2329 *
2330 * @rdev: radeon_device pointer
2331 * @ring: radeon_ring structure holding ring information
2332 * @semaphore: radeon semaphore object
2333 * @emit_wait: wait or signal semaphore
2334 *
2335 * Add a DMA semaphore packet to the ring wait on or signal
2336 * other rings (CIK).
2337 */
2338void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2339 struct radeon_ring *ring,
2340 struct radeon_semaphore *semaphore,
2341 bool emit_wait)
2342{
2343 u64 addr = semaphore->gpu_addr;
2344 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2345
2346 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2347 radeon_ring_write(ring, addr & 0xfffffff8);
2348 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2349}
2350
2351/**
2352 * cik_sdma_gfx_stop - stop the gfx async dma engines
2353 *
2354 * @rdev: radeon_device pointer
2355 *
2356 * Stop the gfx async dma ring buffers (CIK).
2357 */
2358static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2359{
2360 u32 rb_cntl, reg_offset;
2361 int i;
2362
2363 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2364
2365 for (i = 0; i < 2; i++) {
2366 if (i == 0)
2367 reg_offset = SDMA0_REGISTER_OFFSET;
2368 else
2369 reg_offset = SDMA1_REGISTER_OFFSET;
2370 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2371 rb_cntl &= ~SDMA_RB_ENABLE;
2372 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2373 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2374 }
2375}
2376
2377/**
2378 * cik_sdma_rlc_stop - stop the compute async dma engines
2379 *
2380 * @rdev: radeon_device pointer
2381 *
2382 * Stop the compute async dma queues (CIK).
2383 */
2384static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2385{
2386 /* XXX todo */
2387}
2388
2389/**
2390 * cik_sdma_enable - stop the async dma engines
2391 *
2392 * @rdev: radeon_device pointer
2393 * @enable: enable/disable the DMA MEs.
2394 *
2395 * Halt or unhalt the async dma engines (CIK).
2396 */
2397static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2398{
2399 u32 me_cntl, reg_offset;
2400 int i;
2401
2402 for (i = 0; i < 2; i++) {
2403 if (i == 0)
2404 reg_offset = SDMA0_REGISTER_OFFSET;
2405 else
2406 reg_offset = SDMA1_REGISTER_OFFSET;
2407 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2408 if (enable)
2409 me_cntl &= ~SDMA_HALT;
2410 else
2411 me_cntl |= SDMA_HALT;
2412 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2413 }
2414}
2415
2416/**
2417 * cik_sdma_gfx_resume - setup and start the async dma engines
2418 *
2419 * @rdev: radeon_device pointer
2420 *
2421 * Set up the gfx DMA ring buffers and enable them (CIK).
2422 * Returns 0 for success, error for failure.
2423 */
2424static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2425{
2426 struct radeon_ring *ring;
2427 u32 rb_cntl, ib_cntl;
2428 u32 rb_bufsz;
2429 u32 reg_offset, wb_offset;
2430 int i, r;
2431
2432 for (i = 0; i < 2; i++) {
2433 if (i == 0) {
2434 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2435 reg_offset = SDMA0_REGISTER_OFFSET;
2436 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2437 } else {
2438 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2439 reg_offset = SDMA1_REGISTER_OFFSET;
2440 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2441 }
2442
2443 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2444 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2445
2446 /* Set ring buffer size in dwords */
2447 rb_bufsz = drm_order(ring->ring_size / 4);
2448 rb_cntl = rb_bufsz << 1;
2449#ifdef __BIG_ENDIAN
2450 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2451#endif
2452 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2453
2454 /* Initialize the ring buffer's read and write pointers */
2455 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2456 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2457
2458 /* set the wb address whether it's enabled or not */
2459 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2460 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2461 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2462 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2463
2464 if (rdev->wb.enabled)
2465 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2466
2467 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2468 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2469
2470 ring->wptr = 0;
2471 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2472
2473 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2474
2475 /* enable DMA RB */
2476 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2477
2478 ib_cntl = SDMA_IB_ENABLE;
2479#ifdef __BIG_ENDIAN
2480 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2481#endif
2482 /* enable DMA IBs */
2483 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2484
2485 ring->ready = true;
2486
2487 r = radeon_ring_test(rdev, ring->idx, ring);
2488 if (r) {
2489 ring->ready = false;
2490 return r;
2491 }
2492 }
2493
2494 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2495
2496 return 0;
2497}
2498
2499/**
2500 * cik_sdma_rlc_resume - setup and start the async dma engines
2501 *
2502 * @rdev: radeon_device pointer
2503 *
2504 * Set up the compute DMA queues and enable them (CIK).
2505 * Returns 0 for success, error for failure.
2506 */
2507static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2508{
2509 /* XXX todo */
2510 return 0;
2511}
2512
2513/**
2514 * cik_sdma_load_microcode - load the sDMA ME ucode
2515 *
2516 * @rdev: radeon_device pointer
2517 *
2518 * Loads the sDMA0/1 ucode.
2519 * Returns 0 for success, -EINVAL if the ucode is not available.
2520 */
2521static int cik_sdma_load_microcode(struct radeon_device *rdev)
2522{
2523 const __be32 *fw_data;
2524 int i;
2525
2526 if (!rdev->sdma_fw)
2527 return -EINVAL;
2528
2529 /* stop the gfx rings and rlc compute queues */
2530 cik_sdma_gfx_stop(rdev);
2531 cik_sdma_rlc_stop(rdev);
2532
2533 /* halt the MEs */
2534 cik_sdma_enable(rdev, false);
2535
2536 /* sdma0 */
2537 fw_data = (const __be32 *)rdev->sdma_fw->data;
2538 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2539 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2540 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2541 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2542
2543 /* sdma1 */
2544 fw_data = (const __be32 *)rdev->sdma_fw->data;
2545 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2546 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2547 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2548 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2549
2550 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2551 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2552 return 0;
2553}
2554
2555/**
2556 * cik_sdma_resume - setup and start the async dma engines
2557 *
2558 * @rdev: radeon_device pointer
2559 *
2560 * Set up the DMA engines and enable them (CIK).
2561 * Returns 0 for success, error for failure.
2562 */
2563static int cik_sdma_resume(struct radeon_device *rdev)
2564{
2565 int r;
2566
2567 /* Reset dma */
2568 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2569 RREG32(SRBM_SOFT_RESET);
2570 udelay(50);
2571 WREG32(SRBM_SOFT_RESET, 0);
2572 RREG32(SRBM_SOFT_RESET);
2573
2574 r = cik_sdma_load_microcode(rdev);
2575 if (r)
2576 return r;
2577
2578 /* unhalt the MEs */
2579 cik_sdma_enable(rdev, true);
2580
2581 /* start the gfx rings and rlc compute queues */
2582 r = cik_sdma_gfx_resume(rdev);
2583 if (r)
2584 return r;
2585 r = cik_sdma_rlc_resume(rdev);
2586 if (r)
2587 return r;
2588
2589 return 0;
2590}
2591
2592/**
2593 * cik_sdma_fini - tear down the async dma engines
2594 *
2595 * @rdev: radeon_device pointer
2596 *
2597 * Stop the async dma engines and free the rings (CIK).
2598 */
2599static void cik_sdma_fini(struct radeon_device *rdev)
2600{
2601 /* stop the gfx rings and rlc compute queues */
2602 cik_sdma_gfx_stop(rdev);
2603 cik_sdma_rlc_stop(rdev);
2604 /* halt the MEs */
2605 cik_sdma_enable(rdev, false);
2606 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2607 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2608 /* XXX - compute dma queue tear down */
2609}
2610
2611/**
2612 * cik_copy_dma - copy pages using the DMA engine
2613 *
2614 * @rdev: radeon_device pointer
2615 * @src_offset: src GPU address
2616 * @dst_offset: dst GPU address
2617 * @num_gpu_pages: number of GPU pages to xfer
2618 * @fence: radeon fence object
2619 *
2620 * Copy GPU paging using the DMA engine (CIK).
2621 * Used by the radeon ttm implementation to move pages if
2622 * registered as the asic copy callback.
2623 */
2624int cik_copy_dma(struct radeon_device *rdev,
2625 uint64_t src_offset, uint64_t dst_offset,
2626 unsigned num_gpu_pages,
2627 struct radeon_fence **fence)
2628{
2629 struct radeon_semaphore *sem = NULL;
2630 int ring_index = rdev->asic->copy.dma_ring_index;
2631 struct radeon_ring *ring = &rdev->ring[ring_index];
2632 u32 size_in_bytes, cur_size_in_bytes;
2633 int i, num_loops;
2634 int r = 0;
2635
2636 r = radeon_semaphore_create(rdev, &sem);
2637 if (r) {
2638 DRM_ERROR("radeon: moving bo (%d).\n", r);
2639 return r;
2640 }
2641
2642 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2643 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2644 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2645 if (r) {
2646 DRM_ERROR("radeon: moving bo (%d).\n", r);
2647 radeon_semaphore_free(rdev, &sem, NULL);
2648 return r;
2649 }
2650
2651 if (radeon_fence_need_sync(*fence, ring->idx)) {
2652 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2653 ring->idx);
2654 radeon_fence_note_sync(*fence, ring->idx);
2655 } else {
2656 radeon_semaphore_free(rdev, &sem, NULL);
2657 }
2658
2659 for (i = 0; i < num_loops; i++) {
2660 cur_size_in_bytes = size_in_bytes;
2661 if (cur_size_in_bytes > 0x1fffff)
2662 cur_size_in_bytes = 0x1fffff;
2663 size_in_bytes -= cur_size_in_bytes;
2664 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2665 radeon_ring_write(ring, cur_size_in_bytes);
2666 radeon_ring_write(ring, 0); /* src/dst endian swap */
2667 radeon_ring_write(ring, src_offset & 0xffffffff);
2668 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2669 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2670 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2671 src_offset += cur_size_in_bytes;
2672 dst_offset += cur_size_in_bytes;
2673 }
2674
2675 r = radeon_fence_emit(rdev, fence, ring->idx);
2676 if (r) {
2677 radeon_ring_unlock_undo(rdev, ring);
2678 return r;
2679 }
2680
2681 radeon_ring_unlock_commit(rdev, ring);
2682 radeon_semaphore_free(rdev, &sem, *fence);
2683
2684 return r;
2685}
2686
2687/**
2688 * cik_sdma_ring_test - simple async dma engine test
2689 *
2690 * @rdev: radeon_device pointer
2691 * @ring: radeon_ring structure holding ring information
2692 *
2693 * Test the DMA engine by writing using it to write an
2694 * value to memory. (CIK).
2695 * Returns 0 for success, error for failure.
2696 */
2697int cik_sdma_ring_test(struct radeon_device *rdev,
2698 struct radeon_ring *ring)
2699{
2700 unsigned i;
2701 int r;
2702 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2703 u32 tmp;
2704
2705 if (!ptr) {
2706 DRM_ERROR("invalid vram scratch pointer\n");
2707 return -EINVAL;
2708 }
2709
2710 tmp = 0xCAFEDEAD;
2711 writel(tmp, ptr);
2712
2713 r = radeon_ring_lock(rdev, ring, 4);
2714 if (r) {
2715 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2716 return r;
2717 }
2718 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2719 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2720 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2721 radeon_ring_write(ring, 1); /* number of DWs to follow */
2722 radeon_ring_write(ring, 0xDEADBEEF);
2723 radeon_ring_unlock_commit(rdev, ring);
2724
2725 for (i = 0; i < rdev->usec_timeout; i++) {
2726 tmp = readl(ptr);
2727 if (tmp == 0xDEADBEEF)
2728 break;
2729 DRM_UDELAY(1);
2730 }
2731
2732 if (i < rdev->usec_timeout) {
2733 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2734 } else {
2735 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2736 ring->idx, tmp);
2737 r = -EINVAL;
2738 }
2739 return r;
2740}
2741
2742/**
2743 * cik_sdma_ib_test - test an IB on the DMA engine
2744 *
2745 * @rdev: radeon_device pointer
2746 * @ring: radeon_ring structure holding ring information
2747 *
2748 * Test a simple IB in the DMA ring (CIK).
2749 * Returns 0 on success, error on failure.
2750 */
2751int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2752{
2753 struct radeon_ib ib;
2754 unsigned i;
2755 int r;
2756 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2757 u32 tmp = 0;
2758
2759 if (!ptr) {
2760 DRM_ERROR("invalid vram scratch pointer\n");
2761 return -EINVAL;
2762 }
2763
2764 tmp = 0xCAFEDEAD;
2765 writel(tmp, ptr);
2766
2767 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2768 if (r) {
2769 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2770 return r;
2771 }
2772
2773 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2774 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2775 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2776 ib.ptr[3] = 1;
2777 ib.ptr[4] = 0xDEADBEEF;
2778 ib.length_dw = 5;
2779
2780 r = radeon_ib_schedule(rdev, &ib, NULL);
2781 if (r) {
2782 radeon_ib_free(rdev, &ib);
2783 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2784 return r;
2785 }
2786 r = radeon_fence_wait(ib.fence, false);
2787 if (r) {
2788 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2789 return r;
2790 }
2791 for (i = 0; i < rdev->usec_timeout; i++) {
2792 tmp = readl(ptr);
2793 if (tmp == 0xDEADBEEF)
2794 break;
2795 DRM_UDELAY(1);
2796 }
2797 if (i < rdev->usec_timeout) {
2798 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2799 } else {
2800 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2801 r = -EINVAL;
2802 }
2803 radeon_ib_free(rdev, &ib);
2804 return r;
2805}
2806
Alex Deuchercc066712013-04-09 12:59:51 -04002807
2808static void cik_print_gpu_status_regs(struct radeon_device *rdev)
2809{
2810 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2811 RREG32(GRBM_STATUS));
2812 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2813 RREG32(GRBM_STATUS2));
2814 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2815 RREG32(GRBM_STATUS_SE0));
2816 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2817 RREG32(GRBM_STATUS_SE1));
2818 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2819 RREG32(GRBM_STATUS_SE2));
2820 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2821 RREG32(GRBM_STATUS_SE3));
2822 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2823 RREG32(SRBM_STATUS));
2824 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2825 RREG32(SRBM_STATUS2));
2826 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
2827 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2828 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
2829 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2830}
2831
Alex Deucher6f2043c2013-04-09 12:43:41 -04002832/**
Alex Deuchercc066712013-04-09 12:59:51 -04002833 * cik_gpu_check_soft_reset - check which blocks are busy
2834 *
2835 * @rdev: radeon_device pointer
2836 *
2837 * Check which blocks are busy and return the relevant reset
2838 * mask to be used by cik_gpu_soft_reset().
2839 * Returns a mask of the blocks to be reset.
2840 */
2841static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2842{
2843 u32 reset_mask = 0;
2844 u32 tmp;
2845
2846 /* GRBM_STATUS */
2847 tmp = RREG32(GRBM_STATUS);
2848 if (tmp & (PA_BUSY | SC_BUSY |
2849 BCI_BUSY | SX_BUSY |
2850 TA_BUSY | VGT_BUSY |
2851 DB_BUSY | CB_BUSY |
2852 GDS_BUSY | SPI_BUSY |
2853 IA_BUSY | IA_BUSY_NO_DMA))
2854 reset_mask |= RADEON_RESET_GFX;
2855
2856 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2857 reset_mask |= RADEON_RESET_CP;
2858
2859 /* GRBM_STATUS2 */
2860 tmp = RREG32(GRBM_STATUS2);
2861 if (tmp & RLC_BUSY)
2862 reset_mask |= RADEON_RESET_RLC;
2863
2864 /* SDMA0_STATUS_REG */
2865 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2866 if (!(tmp & SDMA_IDLE))
2867 reset_mask |= RADEON_RESET_DMA;
2868
2869 /* SDMA1_STATUS_REG */
2870 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2871 if (!(tmp & SDMA_IDLE))
2872 reset_mask |= RADEON_RESET_DMA1;
2873
2874 /* SRBM_STATUS2 */
2875 tmp = RREG32(SRBM_STATUS2);
2876 if (tmp & SDMA_BUSY)
2877 reset_mask |= RADEON_RESET_DMA;
2878
2879 if (tmp & SDMA1_BUSY)
2880 reset_mask |= RADEON_RESET_DMA1;
2881
2882 /* SRBM_STATUS */
2883 tmp = RREG32(SRBM_STATUS);
2884
2885 if (tmp & IH_BUSY)
2886 reset_mask |= RADEON_RESET_IH;
2887
2888 if (tmp & SEM_BUSY)
2889 reset_mask |= RADEON_RESET_SEM;
2890
2891 if (tmp & GRBM_RQ_PENDING)
2892 reset_mask |= RADEON_RESET_GRBM;
2893
2894 if (tmp & VMC_BUSY)
2895 reset_mask |= RADEON_RESET_VMC;
2896
2897 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2898 MCC_BUSY | MCD_BUSY))
2899 reset_mask |= RADEON_RESET_MC;
2900
2901 if (evergreen_is_display_hung(rdev))
2902 reset_mask |= RADEON_RESET_DISPLAY;
2903
2904 /* Skip MC reset as it's mostly likely not hung, just busy */
2905 if (reset_mask & RADEON_RESET_MC) {
2906 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2907 reset_mask &= ~RADEON_RESET_MC;
2908 }
2909
2910 return reset_mask;
2911}
2912
2913/**
2914 * cik_gpu_soft_reset - soft reset GPU
2915 *
2916 * @rdev: radeon_device pointer
2917 * @reset_mask: mask of which blocks to reset
2918 *
2919 * Soft reset the blocks specified in @reset_mask.
2920 */
2921static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2922{
2923 struct evergreen_mc_save save;
2924 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2925 u32 tmp;
2926
2927 if (reset_mask == 0)
2928 return;
2929
2930 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2931
2932 cik_print_gpu_status_regs(rdev);
2933 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2934 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2935 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2936 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2937
2938 /* stop the rlc */
2939 cik_rlc_stop(rdev);
2940
2941 /* Disable GFX parsing/prefetching */
2942 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2943
2944 /* Disable MEC parsing/prefetching */
2945 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2946
2947 if (reset_mask & RADEON_RESET_DMA) {
2948 /* sdma0 */
2949 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2950 tmp |= SDMA_HALT;
2951 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2952 }
2953 if (reset_mask & RADEON_RESET_DMA1) {
2954 /* sdma1 */
2955 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2956 tmp |= SDMA_HALT;
2957 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2958 }
2959
2960 evergreen_mc_stop(rdev, &save);
2961 if (evergreen_mc_wait_for_idle(rdev)) {
2962 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2963 }
2964
2965 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
2966 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
2967
2968 if (reset_mask & RADEON_RESET_CP) {
2969 grbm_soft_reset |= SOFT_RESET_CP;
2970
2971 srbm_soft_reset |= SOFT_RESET_GRBM;
2972 }
2973
2974 if (reset_mask & RADEON_RESET_DMA)
2975 srbm_soft_reset |= SOFT_RESET_SDMA;
2976
2977 if (reset_mask & RADEON_RESET_DMA1)
2978 srbm_soft_reset |= SOFT_RESET_SDMA1;
2979
2980 if (reset_mask & RADEON_RESET_DISPLAY)
2981 srbm_soft_reset |= SOFT_RESET_DC;
2982
2983 if (reset_mask & RADEON_RESET_RLC)
2984 grbm_soft_reset |= SOFT_RESET_RLC;
2985
2986 if (reset_mask & RADEON_RESET_SEM)
2987 srbm_soft_reset |= SOFT_RESET_SEM;
2988
2989 if (reset_mask & RADEON_RESET_IH)
2990 srbm_soft_reset |= SOFT_RESET_IH;
2991
2992 if (reset_mask & RADEON_RESET_GRBM)
2993 srbm_soft_reset |= SOFT_RESET_GRBM;
2994
2995 if (reset_mask & RADEON_RESET_VMC)
2996 srbm_soft_reset |= SOFT_RESET_VMC;
2997
2998 if (!(rdev->flags & RADEON_IS_IGP)) {
2999 if (reset_mask & RADEON_RESET_MC)
3000 srbm_soft_reset |= SOFT_RESET_MC;
3001 }
3002
3003 if (grbm_soft_reset) {
3004 tmp = RREG32(GRBM_SOFT_RESET);
3005 tmp |= grbm_soft_reset;
3006 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3007 WREG32(GRBM_SOFT_RESET, tmp);
3008 tmp = RREG32(GRBM_SOFT_RESET);
3009
3010 udelay(50);
3011
3012 tmp &= ~grbm_soft_reset;
3013 WREG32(GRBM_SOFT_RESET, tmp);
3014 tmp = RREG32(GRBM_SOFT_RESET);
3015 }
3016
3017 if (srbm_soft_reset) {
3018 tmp = RREG32(SRBM_SOFT_RESET);
3019 tmp |= srbm_soft_reset;
3020 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3021 WREG32(SRBM_SOFT_RESET, tmp);
3022 tmp = RREG32(SRBM_SOFT_RESET);
3023
3024 udelay(50);
3025
3026 tmp &= ~srbm_soft_reset;
3027 WREG32(SRBM_SOFT_RESET, tmp);
3028 tmp = RREG32(SRBM_SOFT_RESET);
3029 }
3030
3031 /* Wait a little for things to settle down */
3032 udelay(50);
3033
3034 evergreen_mc_resume(rdev, &save);
3035 udelay(50);
3036
3037 cik_print_gpu_status_regs(rdev);
3038}
3039
3040/**
3041 * cik_asic_reset - soft reset GPU
3042 *
3043 * @rdev: radeon_device pointer
3044 *
3045 * Look up which blocks are hung and attempt
3046 * to reset them.
3047 * Returns 0 for success.
3048 */
3049int cik_asic_reset(struct radeon_device *rdev)
3050{
3051 u32 reset_mask;
3052
3053 reset_mask = cik_gpu_check_soft_reset(rdev);
3054
3055 if (reset_mask)
3056 r600_set_bios_scratch_engine_hung(rdev, true);
3057
3058 cik_gpu_soft_reset(rdev, reset_mask);
3059
3060 reset_mask = cik_gpu_check_soft_reset(rdev);
3061
3062 if (!reset_mask)
3063 r600_set_bios_scratch_engine_hung(rdev, false);
3064
3065 return 0;
3066}
3067
3068/**
3069 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04003070 *
3071 * @rdev: radeon_device pointer
3072 * @ring: radeon_ring structure holding ring information
3073 *
3074 * Check if the 3D engine is locked up (CIK).
3075 * Returns true if the engine is locked, false if not.
3076 */
Alex Deuchercc066712013-04-09 12:59:51 -04003077bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04003078{
Alex Deuchercc066712013-04-09 12:59:51 -04003079 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04003080
Alex Deuchercc066712013-04-09 12:59:51 -04003081 if (!(reset_mask & (RADEON_RESET_GFX |
3082 RADEON_RESET_COMPUTE |
3083 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04003084 radeon_ring_lockup_update(ring);
3085 return false;
3086 }
3087 /* force CP activities */
3088 radeon_ring_force_activity(rdev, ring);
3089 return radeon_ring_test_lockup(rdev, ring);
3090}
3091
3092/**
Alex Deucher21a93e12013-04-09 12:47:11 -04003093 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3094 *
3095 * @rdev: radeon_device pointer
3096 * @ring: radeon_ring structure holding ring information
3097 *
3098 * Check if the async DMA engine is locked up (CIK).
3099 * Returns true if the engine appears to be locked up, false if not.
3100 */
3101bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3102{
Alex Deuchercc066712013-04-09 12:59:51 -04003103 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3104 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04003105
3106 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04003107 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04003108 else
Alex Deuchercc066712013-04-09 12:59:51 -04003109 mask = RADEON_RESET_DMA1;
3110
3111 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04003112 radeon_ring_lockup_update(ring);
3113 return false;
3114 }
3115 /* force ring activities */
3116 radeon_ring_force_activity(rdev, ring);
3117 return radeon_ring_test_lockup(rdev, ring);
3118}
3119
Alex Deucher1c491652013-04-09 12:45:26 -04003120/* MC */
3121/**
3122 * cik_mc_program - program the GPU memory controller
3123 *
3124 * @rdev: radeon_device pointer
3125 *
3126 * Set the location of vram, gart, and AGP in the GPU's
3127 * physical address space (CIK).
3128 */
3129static void cik_mc_program(struct radeon_device *rdev)
3130{
3131 struct evergreen_mc_save save;
3132 u32 tmp;
3133 int i, j;
3134
3135 /* Initialize HDP */
3136 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3137 WREG32((0x2c14 + j), 0x00000000);
3138 WREG32((0x2c18 + j), 0x00000000);
3139 WREG32((0x2c1c + j), 0x00000000);
3140 WREG32((0x2c20 + j), 0x00000000);
3141 WREG32((0x2c24 + j), 0x00000000);
3142 }
3143 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3144
3145 evergreen_mc_stop(rdev, &save);
3146 if (radeon_mc_wait_for_idle(rdev)) {
3147 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3148 }
3149 /* Lockout access through VGA aperture*/
3150 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3151 /* Update configuration */
3152 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3153 rdev->mc.vram_start >> 12);
3154 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3155 rdev->mc.vram_end >> 12);
3156 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3157 rdev->vram_scratch.gpu_addr >> 12);
3158 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3159 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3160 WREG32(MC_VM_FB_LOCATION, tmp);
3161 /* XXX double check these! */
3162 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3163 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3164 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3165 WREG32(MC_VM_AGP_BASE, 0);
3166 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3167 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3168 if (radeon_mc_wait_for_idle(rdev)) {
3169 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3170 }
3171 evergreen_mc_resume(rdev, &save);
3172 /* we need to own VRAM, so turn off the VGA renderer here
3173 * to stop it overwriting our objects */
3174 rv515_vga_render_disable(rdev);
3175}
3176
3177/**
3178 * cik_mc_init - initialize the memory controller driver params
3179 *
3180 * @rdev: radeon_device pointer
3181 *
3182 * Look up the amount of vram, vram width, and decide how to place
3183 * vram and gart within the GPU's physical address space (CIK).
3184 * Returns 0 for success.
3185 */
3186static int cik_mc_init(struct radeon_device *rdev)
3187{
3188 u32 tmp;
3189 int chansize, numchan;
3190
3191 /* Get VRAM informations */
3192 rdev->mc.vram_is_ddr = true;
3193 tmp = RREG32(MC_ARB_RAMCFG);
3194 if (tmp & CHANSIZE_MASK) {
3195 chansize = 64;
3196 } else {
3197 chansize = 32;
3198 }
3199 tmp = RREG32(MC_SHARED_CHMAP);
3200 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3201 case 0:
3202 default:
3203 numchan = 1;
3204 break;
3205 case 1:
3206 numchan = 2;
3207 break;
3208 case 2:
3209 numchan = 4;
3210 break;
3211 case 3:
3212 numchan = 8;
3213 break;
3214 case 4:
3215 numchan = 3;
3216 break;
3217 case 5:
3218 numchan = 6;
3219 break;
3220 case 6:
3221 numchan = 10;
3222 break;
3223 case 7:
3224 numchan = 12;
3225 break;
3226 case 8:
3227 numchan = 16;
3228 break;
3229 }
3230 rdev->mc.vram_width = numchan * chansize;
3231 /* Could aper size report 0 ? */
3232 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3233 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3234 /* size in MB on si */
3235 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3236 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3237 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3238 si_vram_gtt_location(rdev, &rdev->mc);
3239 radeon_update_bandwidth_info(rdev);
3240
3241 return 0;
3242}
3243
3244/*
3245 * GART
3246 * VMID 0 is the physical GPU addresses as used by the kernel.
3247 * VMIDs 1-15 are used for userspace clients and are handled
3248 * by the radeon vm/hsa code.
3249 */
3250/**
3251 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3252 *
3253 * @rdev: radeon_device pointer
3254 *
3255 * Flush the TLB for the VMID 0 page table (CIK).
3256 */
3257void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3258{
3259 /* flush hdp cache */
3260 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3261
3262 /* bits 0-15 are the VM contexts0-15 */
3263 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3264}
3265
3266/**
3267 * cik_pcie_gart_enable - gart enable
3268 *
3269 * @rdev: radeon_device pointer
3270 *
3271 * This sets up the TLBs, programs the page tables for VMID0,
3272 * sets up the hw for VMIDs 1-15 which are allocated on
3273 * demand, and sets up the global locations for the LDS, GDS,
3274 * and GPUVM for FSA64 clients (CIK).
3275 * Returns 0 for success, errors for failure.
3276 */
3277static int cik_pcie_gart_enable(struct radeon_device *rdev)
3278{
3279 int r, i;
3280
3281 if (rdev->gart.robj == NULL) {
3282 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3283 return -EINVAL;
3284 }
3285 r = radeon_gart_table_vram_pin(rdev);
3286 if (r)
3287 return r;
3288 radeon_gart_restore(rdev);
3289 /* Setup TLB control */
3290 WREG32(MC_VM_MX_L1_TLB_CNTL,
3291 (0xA << 7) |
3292 ENABLE_L1_TLB |
3293 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3294 ENABLE_ADVANCED_DRIVER_MODEL |
3295 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3296 /* Setup L2 cache */
3297 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3298 ENABLE_L2_FRAGMENT_PROCESSING |
3299 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3300 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3301 EFFECTIVE_L2_QUEUE_SIZE(7) |
3302 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3303 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3304 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3305 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3306 /* setup context0 */
3307 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3308 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3309 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3310 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3311 (u32)(rdev->dummy_page.addr >> 12));
3312 WREG32(VM_CONTEXT0_CNTL2, 0);
3313 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3314 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3315
3316 WREG32(0x15D4, 0);
3317 WREG32(0x15D8, 0);
3318 WREG32(0x15DC, 0);
3319
3320 /* empty context1-15 */
3321 /* FIXME start with 4G, once using 2 level pt switch to full
3322 * vm size space
3323 */
3324 /* set vm size, must be a multiple of 4 */
3325 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3326 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3327 for (i = 1; i < 16; i++) {
3328 if (i < 8)
3329 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3330 rdev->gart.table_addr >> 12);
3331 else
3332 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3333 rdev->gart.table_addr >> 12);
3334 }
3335
3336 /* enable context1-15 */
3337 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3338 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003339 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003340 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003341 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3342 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3343 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3344 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3345 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3346 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3347 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3348 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3349 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3350 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3351 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3352 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003353
3354 /* TC cache setup ??? */
3355 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3356 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3357 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3358
3359 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3360 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3361 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3362 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3363 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3364
3365 WREG32(TC_CFG_L1_VOLATILE, 0);
3366 WREG32(TC_CFG_L2_VOLATILE, 0);
3367
3368 if (rdev->family == CHIP_KAVERI) {
3369 u32 tmp = RREG32(CHUB_CONTROL);
3370 tmp &= ~BYPASS_VM;
3371 WREG32(CHUB_CONTROL, tmp);
3372 }
3373
3374 /* XXX SH_MEM regs */
3375 /* where to put LDS, scratch, GPUVM in FSA64 space */
3376 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05003377 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04003378 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003379 WREG32(SH_MEM_CONFIG, 0);
3380 WREG32(SH_MEM_APE1_BASE, 1);
3381 WREG32(SH_MEM_APE1_LIMIT, 0);
3382 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003383 /* SDMA GFX */
3384 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3385 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3386 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3387 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3388 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003389 }
Alex Deucherb556b122013-01-29 10:44:22 -05003390 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04003391
3392 cik_pcie_gart_tlb_flush(rdev);
3393 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3394 (unsigned)(rdev->mc.gtt_size >> 20),
3395 (unsigned long long)rdev->gart.table_addr);
3396 rdev->gart.ready = true;
3397 return 0;
3398}
3399
3400/**
3401 * cik_pcie_gart_disable - gart disable
3402 *
3403 * @rdev: radeon_device pointer
3404 *
3405 * This disables all VM page table (CIK).
3406 */
3407static void cik_pcie_gart_disable(struct radeon_device *rdev)
3408{
3409 /* Disable all tables */
3410 WREG32(VM_CONTEXT0_CNTL, 0);
3411 WREG32(VM_CONTEXT1_CNTL, 0);
3412 /* Setup TLB control */
3413 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3414 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3415 /* Setup L2 cache */
3416 WREG32(VM_L2_CNTL,
3417 ENABLE_L2_FRAGMENT_PROCESSING |
3418 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3419 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3420 EFFECTIVE_L2_QUEUE_SIZE(7) |
3421 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3422 WREG32(VM_L2_CNTL2, 0);
3423 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3424 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3425 radeon_gart_table_vram_unpin(rdev);
3426}
3427
3428/**
3429 * cik_pcie_gart_fini - vm fini callback
3430 *
3431 * @rdev: radeon_device pointer
3432 *
3433 * Tears down the driver GART/VM setup (CIK).
3434 */
3435static void cik_pcie_gart_fini(struct radeon_device *rdev)
3436{
3437 cik_pcie_gart_disable(rdev);
3438 radeon_gart_table_vram_free(rdev);
3439 radeon_gart_fini(rdev);
3440}
3441
3442/* vm parser */
3443/**
3444 * cik_ib_parse - vm ib_parse callback
3445 *
3446 * @rdev: radeon_device pointer
3447 * @ib: indirect buffer pointer
3448 *
3449 * CIK uses hw IB checking so this is a nop (CIK).
3450 */
3451int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3452{
3453 return 0;
3454}
3455
3456/*
3457 * vm
3458 * VMID 0 is the physical GPU addresses as used by the kernel.
3459 * VMIDs 1-15 are used for userspace clients and are handled
3460 * by the radeon vm/hsa code.
3461 */
3462/**
3463 * cik_vm_init - cik vm init callback
3464 *
3465 * @rdev: radeon_device pointer
3466 *
3467 * Inits cik specific vm parameters (number of VMs, base of vram for
3468 * VMIDs 1-15) (CIK).
3469 * Returns 0 for success.
3470 */
3471int cik_vm_init(struct radeon_device *rdev)
3472{
3473 /* number of VMs */
3474 rdev->vm_manager.nvm = 16;
3475 /* base offset of vram pages */
3476 if (rdev->flags & RADEON_IS_IGP) {
3477 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3478 tmp <<= 22;
3479 rdev->vm_manager.vram_base_offset = tmp;
3480 } else
3481 rdev->vm_manager.vram_base_offset = 0;
3482
3483 return 0;
3484}
3485
3486/**
3487 * cik_vm_fini - cik vm fini callback
3488 *
3489 * @rdev: radeon_device pointer
3490 *
3491 * Tear down any asic specific VM setup (CIK).
3492 */
3493void cik_vm_fini(struct radeon_device *rdev)
3494{
3495}
3496
Alex Deucherf96ab482012-08-31 10:37:47 -04003497/**
3498 * cik_vm_flush - cik vm flush using the CP
3499 *
3500 * @rdev: radeon_device pointer
3501 *
3502 * Update the page table base and flush the VM TLB
3503 * using the CP (CIK).
3504 */
3505void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3506{
3507 struct radeon_ring *ring = &rdev->ring[ridx];
3508
3509 if (vm == NULL)
3510 return;
3511
3512 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3513 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3514 WRITE_DATA_DST_SEL(0)));
3515 if (vm->id < 8) {
3516 radeon_ring_write(ring,
3517 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3518 } else {
3519 radeon_ring_write(ring,
3520 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3521 }
3522 radeon_ring_write(ring, 0);
3523 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3524
3525 /* update SH_MEM_* regs */
3526 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3527 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3528 WRITE_DATA_DST_SEL(0)));
3529 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3530 radeon_ring_write(ring, 0);
3531 radeon_ring_write(ring, VMID(vm->id));
3532
3533 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3534 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3535 WRITE_DATA_DST_SEL(0)));
3536 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3537 radeon_ring_write(ring, 0);
3538
3539 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3540 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3541 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3542 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3543
3544 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3545 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3546 WRITE_DATA_DST_SEL(0)));
3547 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3548 radeon_ring_write(ring, 0);
3549 radeon_ring_write(ring, VMID(0));
3550
3551 /* HDP flush */
3552 /* We should be using the WAIT_REG_MEM packet here like in
3553 * cik_fence_ring_emit(), but it causes the CP to hang in this
3554 * context...
3555 */
3556 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3557 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3558 WRITE_DATA_DST_SEL(0)));
3559 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3560 radeon_ring_write(ring, 0);
3561 radeon_ring_write(ring, 0);
3562
3563 /* bits 0-15 are the VM contexts0-15 */
3564 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3565 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3566 WRITE_DATA_DST_SEL(0)));
3567 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3568 radeon_ring_write(ring, 0);
3569 radeon_ring_write(ring, 1 << vm->id);
3570
3571 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3572 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3573 radeon_ring_write(ring, 0x0);
3574}
3575
Alex Deucher605de6b2012-10-22 13:04:03 -04003576/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04003577 * cik_vm_set_page - update the page tables using sDMA
3578 *
3579 * @rdev: radeon_device pointer
3580 * @ib: indirect buffer to fill with commands
3581 * @pe: addr of the page entry
3582 * @addr: dst addr to write into pe
3583 * @count: number of page entries to update
3584 * @incr: increase next addr by incr bytes
3585 * @flags: access flags
3586 *
3587 * Update the page tables using CP or sDMA (CIK).
3588 */
3589void cik_vm_set_page(struct radeon_device *rdev,
3590 struct radeon_ib *ib,
3591 uint64_t pe,
3592 uint64_t addr, unsigned count,
3593 uint32_t incr, uint32_t flags)
3594{
3595 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3596 uint64_t value;
3597 unsigned ndw;
3598
3599 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3600 /* CP */
3601 while (count) {
3602 ndw = 2 + count * 2;
3603 if (ndw > 0x3FFE)
3604 ndw = 0x3FFE;
3605
3606 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3607 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3608 WRITE_DATA_DST_SEL(1));
3609 ib->ptr[ib->length_dw++] = pe;
3610 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3611 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3612 if (flags & RADEON_VM_PAGE_SYSTEM) {
3613 value = radeon_vm_map_gart(rdev, addr);
3614 value &= 0xFFFFFFFFFFFFF000ULL;
3615 } else if (flags & RADEON_VM_PAGE_VALID) {
3616 value = addr;
3617 } else {
3618 value = 0;
3619 }
3620 addr += incr;
3621 value |= r600_flags;
3622 ib->ptr[ib->length_dw++] = value;
3623 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3624 }
3625 }
3626 } else {
3627 /* DMA */
3628 if (flags & RADEON_VM_PAGE_SYSTEM) {
3629 while (count) {
3630 ndw = count * 2;
3631 if (ndw > 0xFFFFE)
3632 ndw = 0xFFFFE;
3633
3634 /* for non-physically contiguous pages (system) */
3635 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3636 ib->ptr[ib->length_dw++] = pe;
3637 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3638 ib->ptr[ib->length_dw++] = ndw;
3639 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3640 if (flags & RADEON_VM_PAGE_SYSTEM) {
3641 value = radeon_vm_map_gart(rdev, addr);
3642 value &= 0xFFFFFFFFFFFFF000ULL;
3643 } else if (flags & RADEON_VM_PAGE_VALID) {
3644 value = addr;
3645 } else {
3646 value = 0;
3647 }
3648 addr += incr;
3649 value |= r600_flags;
3650 ib->ptr[ib->length_dw++] = value;
3651 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3652 }
3653 }
3654 } else {
3655 while (count) {
3656 ndw = count;
3657 if (ndw > 0x7FFFF)
3658 ndw = 0x7FFFF;
3659
3660 if (flags & RADEON_VM_PAGE_VALID)
3661 value = addr;
3662 else
3663 value = 0;
3664 /* for physically contiguous pages (vram) */
3665 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3666 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3667 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3668 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3669 ib->ptr[ib->length_dw++] = 0;
3670 ib->ptr[ib->length_dw++] = value; /* value */
3671 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3672 ib->ptr[ib->length_dw++] = incr; /* increment size */
3673 ib->ptr[ib->length_dw++] = 0;
3674 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3675 pe += ndw * 8;
3676 addr += ndw * incr;
3677 count -= ndw;
3678 }
3679 }
3680 while (ib->length_dw & 0x7)
3681 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3682 }
3683}
3684
3685/**
Alex Deucher605de6b2012-10-22 13:04:03 -04003686 * cik_dma_vm_flush - cik vm flush using sDMA
3687 *
3688 * @rdev: radeon_device pointer
3689 *
3690 * Update the page table base and flush the VM TLB
3691 * using sDMA (CIK).
3692 */
3693void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3694{
3695 struct radeon_ring *ring = &rdev->ring[ridx];
3696 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3697 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3698 u32 ref_and_mask;
3699
3700 if (vm == NULL)
3701 return;
3702
3703 if (ridx == R600_RING_TYPE_DMA_INDEX)
3704 ref_and_mask = SDMA0;
3705 else
3706 ref_and_mask = SDMA1;
3707
3708 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3709 if (vm->id < 8) {
3710 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3711 } else {
3712 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3713 }
3714 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3715
3716 /* update SH_MEM_* regs */
3717 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3718 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3719 radeon_ring_write(ring, VMID(vm->id));
3720
3721 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3722 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3723 radeon_ring_write(ring, 0);
3724
3725 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3726 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3727 radeon_ring_write(ring, 0);
3728
3729 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3730 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3731 radeon_ring_write(ring, 1);
3732
3733 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3734 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3735 radeon_ring_write(ring, 0);
3736
3737 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3738 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3739 radeon_ring_write(ring, VMID(0));
3740
3741 /* flush HDP */
3742 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3743 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3744 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3745 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3746 radeon_ring_write(ring, ref_and_mask); /* MASK */
3747 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3748
3749 /* flush TLB */
3750 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3751 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3752 radeon_ring_write(ring, 1 << vm->id);
3753}
3754
Alex Deucherf6796ca2012-11-09 10:44:08 -05003755/*
3756 * RLC
3757 * The RLC is a multi-purpose microengine that handles a
3758 * variety of functions, the most important of which is
3759 * the interrupt controller.
3760 */
3761/**
3762 * cik_rlc_stop - stop the RLC ME
3763 *
3764 * @rdev: radeon_device pointer
3765 *
3766 * Halt the RLC ME (MicroEngine) (CIK).
3767 */
3768static void cik_rlc_stop(struct radeon_device *rdev)
3769{
3770 int i, j, k;
3771 u32 mask, tmp;
3772
3773 tmp = RREG32(CP_INT_CNTL_RING0);
3774 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3775 WREG32(CP_INT_CNTL_RING0, tmp);
3776
3777 RREG32(CB_CGTT_SCLK_CTRL);
3778 RREG32(CB_CGTT_SCLK_CTRL);
3779 RREG32(CB_CGTT_SCLK_CTRL);
3780 RREG32(CB_CGTT_SCLK_CTRL);
3781
3782 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3783 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3784
3785 WREG32(RLC_CNTL, 0);
3786
3787 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3788 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3789 cik_select_se_sh(rdev, i, j);
3790 for (k = 0; k < rdev->usec_timeout; k++) {
3791 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3792 break;
3793 udelay(1);
3794 }
3795 }
3796 }
3797 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3798
3799 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3800 for (k = 0; k < rdev->usec_timeout; k++) {
3801 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3802 break;
3803 udelay(1);
3804 }
3805}
3806
3807/**
3808 * cik_rlc_start - start the RLC ME
3809 *
3810 * @rdev: radeon_device pointer
3811 *
3812 * Unhalt the RLC ME (MicroEngine) (CIK).
3813 */
3814static void cik_rlc_start(struct radeon_device *rdev)
3815{
3816 u32 tmp;
3817
3818 WREG32(RLC_CNTL, RLC_ENABLE);
3819
3820 tmp = RREG32(CP_INT_CNTL_RING0);
3821 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3822 WREG32(CP_INT_CNTL_RING0, tmp);
3823
3824 udelay(50);
3825}
3826
3827/**
3828 * cik_rlc_resume - setup the RLC hw
3829 *
3830 * @rdev: radeon_device pointer
3831 *
3832 * Initialize the RLC registers, load the ucode,
3833 * and start the RLC (CIK).
3834 * Returns 0 for success, -EINVAL if the ucode is not available.
3835 */
3836static int cik_rlc_resume(struct radeon_device *rdev)
3837{
3838 u32 i, size;
3839 u32 clear_state_info[3];
3840 const __be32 *fw_data;
3841
3842 if (!rdev->rlc_fw)
3843 return -EINVAL;
3844
3845 switch (rdev->family) {
3846 case CHIP_BONAIRE:
3847 default:
3848 size = BONAIRE_RLC_UCODE_SIZE;
3849 break;
3850 case CHIP_KAVERI:
3851 size = KV_RLC_UCODE_SIZE;
3852 break;
3853 case CHIP_KABINI:
3854 size = KB_RLC_UCODE_SIZE;
3855 break;
3856 }
3857
3858 cik_rlc_stop(rdev);
3859
3860 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3861 RREG32(GRBM_SOFT_RESET);
3862 udelay(50);
3863 WREG32(GRBM_SOFT_RESET, 0);
3864 RREG32(GRBM_SOFT_RESET);
3865 udelay(50);
3866
3867 WREG32(RLC_LB_CNTR_INIT, 0);
3868 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3869
3870 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3871 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3872 WREG32(RLC_LB_PARAMS, 0x00600408);
3873 WREG32(RLC_LB_CNTL, 0x80000004);
3874
3875 WREG32(RLC_MC_CNTL, 0);
3876 WREG32(RLC_UCODE_CNTL, 0);
3877
3878 fw_data = (const __be32 *)rdev->rlc_fw->data;
3879 WREG32(RLC_GPM_UCODE_ADDR, 0);
3880 for (i = 0; i < size; i++)
3881 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3882 WREG32(RLC_GPM_UCODE_ADDR, 0);
3883
3884 /* XXX */
3885 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3886 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3887 clear_state_info[2] = 0;//cik_default_size;
3888 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3889 for (i = 0; i < 3; i++)
3890 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3891 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3892
3893 cik_rlc_start(rdev);
3894
3895 return 0;
3896}
Alex Deuchera59781b2012-11-09 10:45:57 -05003897
3898/*
3899 * Interrupts
3900 * Starting with r6xx, interrupts are handled via a ring buffer.
3901 * Ring buffers are areas of GPU accessible memory that the GPU
3902 * writes interrupt vectors into and the host reads vectors out of.
3903 * There is a rptr (read pointer) that determines where the
3904 * host is currently reading, and a wptr (write pointer)
3905 * which determines where the GPU has written. When the
3906 * pointers are equal, the ring is idle. When the GPU
3907 * writes vectors to the ring buffer, it increments the
3908 * wptr. When there is an interrupt, the host then starts
3909 * fetching commands and processing them until the pointers are
3910 * equal again at which point it updates the rptr.
3911 */
3912
3913/**
3914 * cik_enable_interrupts - Enable the interrupt ring buffer
3915 *
3916 * @rdev: radeon_device pointer
3917 *
3918 * Enable the interrupt ring buffer (CIK).
3919 */
3920static void cik_enable_interrupts(struct radeon_device *rdev)
3921{
3922 u32 ih_cntl = RREG32(IH_CNTL);
3923 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3924
3925 ih_cntl |= ENABLE_INTR;
3926 ih_rb_cntl |= IH_RB_ENABLE;
3927 WREG32(IH_CNTL, ih_cntl);
3928 WREG32(IH_RB_CNTL, ih_rb_cntl);
3929 rdev->ih.enabled = true;
3930}
3931
3932/**
3933 * cik_disable_interrupts - Disable the interrupt ring buffer
3934 *
3935 * @rdev: radeon_device pointer
3936 *
3937 * Disable the interrupt ring buffer (CIK).
3938 */
3939static void cik_disable_interrupts(struct radeon_device *rdev)
3940{
3941 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3942 u32 ih_cntl = RREG32(IH_CNTL);
3943
3944 ih_rb_cntl &= ~IH_RB_ENABLE;
3945 ih_cntl &= ~ENABLE_INTR;
3946 WREG32(IH_RB_CNTL, ih_rb_cntl);
3947 WREG32(IH_CNTL, ih_cntl);
3948 /* set rptr, wptr to 0 */
3949 WREG32(IH_RB_RPTR, 0);
3950 WREG32(IH_RB_WPTR, 0);
3951 rdev->ih.enabled = false;
3952 rdev->ih.rptr = 0;
3953}
3954
3955/**
3956 * cik_disable_interrupt_state - Disable all interrupt sources
3957 *
3958 * @rdev: radeon_device pointer
3959 *
3960 * Clear all interrupt enable bits used by the driver (CIK).
3961 */
3962static void cik_disable_interrupt_state(struct radeon_device *rdev)
3963{
3964 u32 tmp;
3965
3966 /* gfx ring */
3967 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04003968 /* sdma */
3969 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3970 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3971 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3972 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05003973 /* compute queues */
3974 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3975 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3976 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3977 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3978 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3979 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3980 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3981 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3982 /* grbm */
3983 WREG32(GRBM_INT_CNTL, 0);
3984 /* vline/vblank, etc. */
3985 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3986 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3987 if (rdev->num_crtc >= 4) {
3988 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3989 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3990 }
3991 if (rdev->num_crtc >= 6) {
3992 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3993 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3994 }
3995
3996 /* dac hotplug */
3997 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3998
3999 /* digital hotplug */
4000 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4001 WREG32(DC_HPD1_INT_CONTROL, tmp);
4002 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4003 WREG32(DC_HPD2_INT_CONTROL, tmp);
4004 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4005 WREG32(DC_HPD3_INT_CONTROL, tmp);
4006 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4007 WREG32(DC_HPD4_INT_CONTROL, tmp);
4008 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4009 WREG32(DC_HPD5_INT_CONTROL, tmp);
4010 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4011 WREG32(DC_HPD6_INT_CONTROL, tmp);
4012
4013}
4014
4015/**
4016 * cik_irq_init - init and enable the interrupt ring
4017 *
4018 * @rdev: radeon_device pointer
4019 *
4020 * Allocate a ring buffer for the interrupt controller,
4021 * enable the RLC, disable interrupts, enable the IH
4022 * ring buffer and enable it (CIK).
4023 * Called at device load and reume.
4024 * Returns 0 for success, errors for failure.
4025 */
4026static int cik_irq_init(struct radeon_device *rdev)
4027{
4028 int ret = 0;
4029 int rb_bufsz;
4030 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4031
4032 /* allocate ring */
4033 ret = r600_ih_ring_alloc(rdev);
4034 if (ret)
4035 return ret;
4036
4037 /* disable irqs */
4038 cik_disable_interrupts(rdev);
4039
4040 /* init rlc */
4041 ret = cik_rlc_resume(rdev);
4042 if (ret) {
4043 r600_ih_ring_fini(rdev);
4044 return ret;
4045 }
4046
4047 /* setup interrupt control */
4048 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4049 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4050 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4051 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4052 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4053 */
4054 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4055 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4056 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4057 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4058
4059 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4060 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4061
4062 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4063 IH_WPTR_OVERFLOW_CLEAR |
4064 (rb_bufsz << 1));
4065
4066 if (rdev->wb.enabled)
4067 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4068
4069 /* set the writeback address whether it's enabled or not */
4070 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4071 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4072
4073 WREG32(IH_RB_CNTL, ih_rb_cntl);
4074
4075 /* set rptr, wptr to 0 */
4076 WREG32(IH_RB_RPTR, 0);
4077 WREG32(IH_RB_WPTR, 0);
4078
4079 /* Default settings for IH_CNTL (disabled at first) */
4080 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4081 /* RPTR_REARM only works if msi's are enabled */
4082 if (rdev->msi_enabled)
4083 ih_cntl |= RPTR_REARM;
4084 WREG32(IH_CNTL, ih_cntl);
4085
4086 /* force the active interrupt state to all disabled */
4087 cik_disable_interrupt_state(rdev);
4088
4089 pci_set_master(rdev->pdev);
4090
4091 /* enable irqs */
4092 cik_enable_interrupts(rdev);
4093
4094 return ret;
4095}
4096
4097/**
4098 * cik_irq_set - enable/disable interrupt sources
4099 *
4100 * @rdev: radeon_device pointer
4101 *
4102 * Enable interrupt sources on the GPU (vblanks, hpd,
4103 * etc.) (CIK).
4104 * Returns 0 for success, errors for failure.
4105 */
4106int cik_irq_set(struct radeon_device *rdev)
4107{
4108 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4109 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4110 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4111 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4112 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04004113 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05004114
4115 if (!rdev->irq.installed) {
4116 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4117 return -EINVAL;
4118 }
4119 /* don't enable anything if the ih is disabled */
4120 if (!rdev->ih.enabled) {
4121 cik_disable_interrupts(rdev);
4122 /* force the active interrupt state to all disabled */
4123 cik_disable_interrupt_state(rdev);
4124 return 0;
4125 }
4126
4127 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4128 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4129 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4130 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4131 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4132 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4133
Alex Deucher21a93e12013-04-09 12:47:11 -04004134 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4135 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4136
Alex Deuchera59781b2012-11-09 10:45:57 -05004137 /* enable CP interrupts on all rings */
4138 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4139 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4140 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4141 }
4142 /* TODO: compute queues! */
4143 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4144
Alex Deucher21a93e12013-04-09 12:47:11 -04004145 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4146 DRM_DEBUG("cik_irq_set: sw int dma\n");
4147 dma_cntl |= TRAP_ENABLE;
4148 }
4149
4150 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4151 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4152 dma_cntl1 |= TRAP_ENABLE;
4153 }
4154
Alex Deuchera59781b2012-11-09 10:45:57 -05004155 if (rdev->irq.crtc_vblank_int[0] ||
4156 atomic_read(&rdev->irq.pflip[0])) {
4157 DRM_DEBUG("cik_irq_set: vblank 0\n");
4158 crtc1 |= VBLANK_INTERRUPT_MASK;
4159 }
4160 if (rdev->irq.crtc_vblank_int[1] ||
4161 atomic_read(&rdev->irq.pflip[1])) {
4162 DRM_DEBUG("cik_irq_set: vblank 1\n");
4163 crtc2 |= VBLANK_INTERRUPT_MASK;
4164 }
4165 if (rdev->irq.crtc_vblank_int[2] ||
4166 atomic_read(&rdev->irq.pflip[2])) {
4167 DRM_DEBUG("cik_irq_set: vblank 2\n");
4168 crtc3 |= VBLANK_INTERRUPT_MASK;
4169 }
4170 if (rdev->irq.crtc_vblank_int[3] ||
4171 atomic_read(&rdev->irq.pflip[3])) {
4172 DRM_DEBUG("cik_irq_set: vblank 3\n");
4173 crtc4 |= VBLANK_INTERRUPT_MASK;
4174 }
4175 if (rdev->irq.crtc_vblank_int[4] ||
4176 atomic_read(&rdev->irq.pflip[4])) {
4177 DRM_DEBUG("cik_irq_set: vblank 4\n");
4178 crtc5 |= VBLANK_INTERRUPT_MASK;
4179 }
4180 if (rdev->irq.crtc_vblank_int[5] ||
4181 atomic_read(&rdev->irq.pflip[5])) {
4182 DRM_DEBUG("cik_irq_set: vblank 5\n");
4183 crtc6 |= VBLANK_INTERRUPT_MASK;
4184 }
4185 if (rdev->irq.hpd[0]) {
4186 DRM_DEBUG("cik_irq_set: hpd 1\n");
4187 hpd1 |= DC_HPDx_INT_EN;
4188 }
4189 if (rdev->irq.hpd[1]) {
4190 DRM_DEBUG("cik_irq_set: hpd 2\n");
4191 hpd2 |= DC_HPDx_INT_EN;
4192 }
4193 if (rdev->irq.hpd[2]) {
4194 DRM_DEBUG("cik_irq_set: hpd 3\n");
4195 hpd3 |= DC_HPDx_INT_EN;
4196 }
4197 if (rdev->irq.hpd[3]) {
4198 DRM_DEBUG("cik_irq_set: hpd 4\n");
4199 hpd4 |= DC_HPDx_INT_EN;
4200 }
4201 if (rdev->irq.hpd[4]) {
4202 DRM_DEBUG("cik_irq_set: hpd 5\n");
4203 hpd5 |= DC_HPDx_INT_EN;
4204 }
4205 if (rdev->irq.hpd[5]) {
4206 DRM_DEBUG("cik_irq_set: hpd 6\n");
4207 hpd6 |= DC_HPDx_INT_EN;
4208 }
4209
4210 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4211
Alex Deucher21a93e12013-04-09 12:47:11 -04004212 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4213 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4214
Alex Deuchera59781b2012-11-09 10:45:57 -05004215 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4216
4217 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4218 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4219 if (rdev->num_crtc >= 4) {
4220 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4221 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4222 }
4223 if (rdev->num_crtc >= 6) {
4224 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4225 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4226 }
4227
4228 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4229 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4230 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4231 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4232 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4233 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4234
4235 return 0;
4236}
4237
4238/**
4239 * cik_irq_ack - ack interrupt sources
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Ack interrupt sources on the GPU (vblanks, hpd,
4244 * etc.) (CIK). Certain interrupts sources are sw
4245 * generated and do not require an explicit ack.
4246 */
4247static inline void cik_irq_ack(struct radeon_device *rdev)
4248{
4249 u32 tmp;
4250
4251 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4252 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4253 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4254 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4255 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4256 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4257 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4258
4259 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4260 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4261 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4262 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4263 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4264 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4265 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4266 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4267
4268 if (rdev->num_crtc >= 4) {
4269 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4270 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4271 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4272 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4273 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4274 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4275 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4276 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4277 }
4278
4279 if (rdev->num_crtc >= 6) {
4280 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4281 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4282 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4283 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4284 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4285 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4286 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4287 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4288 }
4289
4290 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4291 tmp = RREG32(DC_HPD1_INT_CONTROL);
4292 tmp |= DC_HPDx_INT_ACK;
4293 WREG32(DC_HPD1_INT_CONTROL, tmp);
4294 }
4295 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4296 tmp = RREG32(DC_HPD2_INT_CONTROL);
4297 tmp |= DC_HPDx_INT_ACK;
4298 WREG32(DC_HPD2_INT_CONTROL, tmp);
4299 }
4300 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4301 tmp = RREG32(DC_HPD3_INT_CONTROL);
4302 tmp |= DC_HPDx_INT_ACK;
4303 WREG32(DC_HPD3_INT_CONTROL, tmp);
4304 }
4305 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4306 tmp = RREG32(DC_HPD4_INT_CONTROL);
4307 tmp |= DC_HPDx_INT_ACK;
4308 WREG32(DC_HPD4_INT_CONTROL, tmp);
4309 }
4310 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4311 tmp = RREG32(DC_HPD5_INT_CONTROL);
4312 tmp |= DC_HPDx_INT_ACK;
4313 WREG32(DC_HPD5_INT_CONTROL, tmp);
4314 }
4315 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4316 tmp = RREG32(DC_HPD5_INT_CONTROL);
4317 tmp |= DC_HPDx_INT_ACK;
4318 WREG32(DC_HPD6_INT_CONTROL, tmp);
4319 }
4320}
4321
4322/**
4323 * cik_irq_disable - disable interrupts
4324 *
4325 * @rdev: radeon_device pointer
4326 *
4327 * Disable interrupts on the hw (CIK).
4328 */
4329static void cik_irq_disable(struct radeon_device *rdev)
4330{
4331 cik_disable_interrupts(rdev);
4332 /* Wait and acknowledge irq */
4333 mdelay(1);
4334 cik_irq_ack(rdev);
4335 cik_disable_interrupt_state(rdev);
4336}
4337
4338/**
4339 * cik_irq_disable - disable interrupts for suspend
4340 *
4341 * @rdev: radeon_device pointer
4342 *
4343 * Disable interrupts and stop the RLC (CIK).
4344 * Used for suspend.
4345 */
4346static void cik_irq_suspend(struct radeon_device *rdev)
4347{
4348 cik_irq_disable(rdev);
4349 cik_rlc_stop(rdev);
4350}
4351
4352/**
4353 * cik_irq_fini - tear down interrupt support
4354 *
4355 * @rdev: radeon_device pointer
4356 *
4357 * Disable interrupts on the hw and free the IH ring
4358 * buffer (CIK).
4359 * Used for driver unload.
4360 */
4361static void cik_irq_fini(struct radeon_device *rdev)
4362{
4363 cik_irq_suspend(rdev);
4364 r600_ih_ring_fini(rdev);
4365}
4366
4367/**
4368 * cik_get_ih_wptr - get the IH ring buffer wptr
4369 *
4370 * @rdev: radeon_device pointer
4371 *
4372 * Get the IH ring buffer wptr from either the register
4373 * or the writeback memory buffer (CIK). Also check for
4374 * ring buffer overflow and deal with it.
4375 * Used by cik_irq_process().
4376 * Returns the value of the wptr.
4377 */
4378static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4379{
4380 u32 wptr, tmp;
4381
4382 if (rdev->wb.enabled)
4383 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4384 else
4385 wptr = RREG32(IH_RB_WPTR);
4386
4387 if (wptr & RB_OVERFLOW) {
4388 /* When a ring buffer overflow happen start parsing interrupt
4389 * from the last not overwritten vector (wptr + 16). Hopefully
4390 * this should allow us to catchup.
4391 */
4392 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4393 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4394 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4395 tmp = RREG32(IH_RB_CNTL);
4396 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4397 WREG32(IH_RB_CNTL, tmp);
4398 }
4399 return (wptr & rdev->ih.ptr_mask);
4400}
4401
4402/* CIK IV Ring
4403 * Each IV ring entry is 128 bits:
4404 * [7:0] - interrupt source id
4405 * [31:8] - reserved
4406 * [59:32] - interrupt source data
4407 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04004408 * [71:64] - RINGID
4409 * CP:
4410 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05004411 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4412 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4413 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4414 * PIPE_ID - ME0 0=3D
4415 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04004416 * SDMA:
4417 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4418 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4419 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05004420 * [79:72] - VMID
4421 * [95:80] - PASID
4422 * [127:96] - reserved
4423 */
4424/**
4425 * cik_irq_process - interrupt handler
4426 *
4427 * @rdev: radeon_device pointer
4428 *
4429 * Interrupt hander (CIK). Walk the IH ring,
4430 * ack interrupts and schedule work to handle
4431 * interrupt events.
4432 * Returns irq process return code.
4433 */
4434int cik_irq_process(struct radeon_device *rdev)
4435{
4436 u32 wptr;
4437 u32 rptr;
4438 u32 src_id, src_data, ring_id;
4439 u8 me_id, pipe_id, queue_id;
4440 u32 ring_index;
4441 bool queue_hotplug = false;
4442 bool queue_reset = false;
4443
4444 if (!rdev->ih.enabled || rdev->shutdown)
4445 return IRQ_NONE;
4446
4447 wptr = cik_get_ih_wptr(rdev);
4448
4449restart_ih:
4450 /* is somebody else already processing irqs? */
4451 if (atomic_xchg(&rdev->ih.lock, 1))
4452 return IRQ_NONE;
4453
4454 rptr = rdev->ih.rptr;
4455 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4456
4457 /* Order reading of wptr vs. reading of IH ring data */
4458 rmb();
4459
4460 /* display interrupts */
4461 cik_irq_ack(rdev);
4462
4463 while (rptr != wptr) {
4464 /* wptr/rptr are in bytes! */
4465 ring_index = rptr / 4;
4466 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4467 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4468 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05004469
4470 switch (src_id) {
4471 case 1: /* D1 vblank/vline */
4472 switch (src_data) {
4473 case 0: /* D1 vblank */
4474 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4475 if (rdev->irq.crtc_vblank_int[0]) {
4476 drm_handle_vblank(rdev->ddev, 0);
4477 rdev->pm.vblank_sync = true;
4478 wake_up(&rdev->irq.vblank_queue);
4479 }
4480 if (atomic_read(&rdev->irq.pflip[0]))
4481 radeon_crtc_handle_flip(rdev, 0);
4482 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4483 DRM_DEBUG("IH: D1 vblank\n");
4484 }
4485 break;
4486 case 1: /* D1 vline */
4487 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4488 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4489 DRM_DEBUG("IH: D1 vline\n");
4490 }
4491 break;
4492 default:
4493 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4494 break;
4495 }
4496 break;
4497 case 2: /* D2 vblank/vline */
4498 switch (src_data) {
4499 case 0: /* D2 vblank */
4500 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4501 if (rdev->irq.crtc_vblank_int[1]) {
4502 drm_handle_vblank(rdev->ddev, 1);
4503 rdev->pm.vblank_sync = true;
4504 wake_up(&rdev->irq.vblank_queue);
4505 }
4506 if (atomic_read(&rdev->irq.pflip[1]))
4507 radeon_crtc_handle_flip(rdev, 1);
4508 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4509 DRM_DEBUG("IH: D2 vblank\n");
4510 }
4511 break;
4512 case 1: /* D2 vline */
4513 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4514 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4515 DRM_DEBUG("IH: D2 vline\n");
4516 }
4517 break;
4518 default:
4519 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4520 break;
4521 }
4522 break;
4523 case 3: /* D3 vblank/vline */
4524 switch (src_data) {
4525 case 0: /* D3 vblank */
4526 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4527 if (rdev->irq.crtc_vblank_int[2]) {
4528 drm_handle_vblank(rdev->ddev, 2);
4529 rdev->pm.vblank_sync = true;
4530 wake_up(&rdev->irq.vblank_queue);
4531 }
4532 if (atomic_read(&rdev->irq.pflip[2]))
4533 radeon_crtc_handle_flip(rdev, 2);
4534 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4535 DRM_DEBUG("IH: D3 vblank\n");
4536 }
4537 break;
4538 case 1: /* D3 vline */
4539 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4540 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4541 DRM_DEBUG("IH: D3 vline\n");
4542 }
4543 break;
4544 default:
4545 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4546 break;
4547 }
4548 break;
4549 case 4: /* D4 vblank/vline */
4550 switch (src_data) {
4551 case 0: /* D4 vblank */
4552 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4553 if (rdev->irq.crtc_vblank_int[3]) {
4554 drm_handle_vblank(rdev->ddev, 3);
4555 rdev->pm.vblank_sync = true;
4556 wake_up(&rdev->irq.vblank_queue);
4557 }
4558 if (atomic_read(&rdev->irq.pflip[3]))
4559 radeon_crtc_handle_flip(rdev, 3);
4560 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4561 DRM_DEBUG("IH: D4 vblank\n");
4562 }
4563 break;
4564 case 1: /* D4 vline */
4565 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4566 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4567 DRM_DEBUG("IH: D4 vline\n");
4568 }
4569 break;
4570 default:
4571 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4572 break;
4573 }
4574 break;
4575 case 5: /* D5 vblank/vline */
4576 switch (src_data) {
4577 case 0: /* D5 vblank */
4578 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4579 if (rdev->irq.crtc_vblank_int[4]) {
4580 drm_handle_vblank(rdev->ddev, 4);
4581 rdev->pm.vblank_sync = true;
4582 wake_up(&rdev->irq.vblank_queue);
4583 }
4584 if (atomic_read(&rdev->irq.pflip[4]))
4585 radeon_crtc_handle_flip(rdev, 4);
4586 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4587 DRM_DEBUG("IH: D5 vblank\n");
4588 }
4589 break;
4590 case 1: /* D5 vline */
4591 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4592 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4593 DRM_DEBUG("IH: D5 vline\n");
4594 }
4595 break;
4596 default:
4597 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4598 break;
4599 }
4600 break;
4601 case 6: /* D6 vblank/vline */
4602 switch (src_data) {
4603 case 0: /* D6 vblank */
4604 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4605 if (rdev->irq.crtc_vblank_int[5]) {
4606 drm_handle_vblank(rdev->ddev, 5);
4607 rdev->pm.vblank_sync = true;
4608 wake_up(&rdev->irq.vblank_queue);
4609 }
4610 if (atomic_read(&rdev->irq.pflip[5]))
4611 radeon_crtc_handle_flip(rdev, 5);
4612 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4613 DRM_DEBUG("IH: D6 vblank\n");
4614 }
4615 break;
4616 case 1: /* D6 vline */
4617 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4618 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4619 DRM_DEBUG("IH: D6 vline\n");
4620 }
4621 break;
4622 default:
4623 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4624 break;
4625 }
4626 break;
4627 case 42: /* HPD hotplug */
4628 switch (src_data) {
4629 case 0:
4630 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4631 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4632 queue_hotplug = true;
4633 DRM_DEBUG("IH: HPD1\n");
4634 }
4635 break;
4636 case 1:
4637 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4638 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4639 queue_hotplug = true;
4640 DRM_DEBUG("IH: HPD2\n");
4641 }
4642 break;
4643 case 2:
4644 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4645 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4646 queue_hotplug = true;
4647 DRM_DEBUG("IH: HPD3\n");
4648 }
4649 break;
4650 case 3:
4651 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4652 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4653 queue_hotplug = true;
4654 DRM_DEBUG("IH: HPD4\n");
4655 }
4656 break;
4657 case 4:
4658 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4659 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4660 queue_hotplug = true;
4661 DRM_DEBUG("IH: HPD5\n");
4662 }
4663 break;
4664 case 5:
4665 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4666 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4667 queue_hotplug = true;
4668 DRM_DEBUG("IH: HPD6\n");
4669 }
4670 break;
4671 default:
4672 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4673 break;
4674 }
4675 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04004676 case 146:
4677 case 147:
4678 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4679 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4680 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4681 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4682 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4683 /* reset addr and status */
4684 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4685 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004686 case 176: /* GFX RB CP_INT */
4687 case 177: /* GFX IB CP_INT */
4688 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4689 break;
4690 case 181: /* CP EOP event */
4691 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004692 /* XXX check the bitfield order! */
4693 me_id = (ring_id & 0x60) >> 5;
4694 pipe_id = (ring_id & 0x18) >> 3;
4695 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004696 switch (me_id) {
4697 case 0:
4698 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4699 break;
4700 case 1:
4701 /* XXX compute */
4702 break;
4703 case 2:
4704 /* XXX compute */
4705 break;
4706 }
4707 break;
4708 case 184: /* CP Privileged reg access */
4709 DRM_ERROR("Illegal register access in command stream\n");
4710 /* XXX check the bitfield order! */
4711 me_id = (ring_id & 0x60) >> 5;
4712 pipe_id = (ring_id & 0x18) >> 3;
4713 queue_id = (ring_id & 0x7) >> 0;
4714 switch (me_id) {
4715 case 0:
4716 /* This results in a full GPU reset, but all we need to do is soft
4717 * reset the CP for gfx
4718 */
4719 queue_reset = true;
4720 break;
4721 case 1:
4722 /* XXX compute */
4723 break;
4724 case 2:
4725 /* XXX compute */
4726 break;
4727 }
4728 break;
4729 case 185: /* CP Privileged inst */
4730 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004731 /* XXX check the bitfield order! */
4732 me_id = (ring_id & 0x60) >> 5;
4733 pipe_id = (ring_id & 0x18) >> 3;
4734 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004735 switch (me_id) {
4736 case 0:
4737 /* This results in a full GPU reset, but all we need to do is soft
4738 * reset the CP for gfx
4739 */
4740 queue_reset = true;
4741 break;
4742 case 1:
4743 /* XXX compute */
4744 break;
4745 case 2:
4746 /* XXX compute */
4747 break;
4748 }
4749 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04004750 case 224: /* SDMA trap event */
4751 /* XXX check the bitfield order! */
4752 me_id = (ring_id & 0x3) >> 0;
4753 queue_id = (ring_id & 0xc) >> 2;
4754 DRM_DEBUG("IH: SDMA trap\n");
4755 switch (me_id) {
4756 case 0:
4757 switch (queue_id) {
4758 case 0:
4759 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4760 break;
4761 case 1:
4762 /* XXX compute */
4763 break;
4764 case 2:
4765 /* XXX compute */
4766 break;
4767 }
4768 break;
4769 case 1:
4770 switch (queue_id) {
4771 case 0:
4772 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4773 break;
4774 case 1:
4775 /* XXX compute */
4776 break;
4777 case 2:
4778 /* XXX compute */
4779 break;
4780 }
4781 break;
4782 }
4783 break;
4784 case 241: /* SDMA Privileged inst */
4785 case 247: /* SDMA Privileged inst */
4786 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4787 /* XXX check the bitfield order! */
4788 me_id = (ring_id & 0x3) >> 0;
4789 queue_id = (ring_id & 0xc) >> 2;
4790 switch (me_id) {
4791 case 0:
4792 switch (queue_id) {
4793 case 0:
4794 queue_reset = true;
4795 break;
4796 case 1:
4797 /* XXX compute */
4798 queue_reset = true;
4799 break;
4800 case 2:
4801 /* XXX compute */
4802 queue_reset = true;
4803 break;
4804 }
4805 break;
4806 case 1:
4807 switch (queue_id) {
4808 case 0:
4809 queue_reset = true;
4810 break;
4811 case 1:
4812 /* XXX compute */
4813 queue_reset = true;
4814 break;
4815 case 2:
4816 /* XXX compute */
4817 queue_reset = true;
4818 break;
4819 }
4820 break;
4821 }
4822 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004823 case 233: /* GUI IDLE */
4824 DRM_DEBUG("IH: GUI idle\n");
4825 break;
4826 default:
4827 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4828 break;
4829 }
4830
4831 /* wptr/rptr are in bytes! */
4832 rptr += 16;
4833 rptr &= rdev->ih.ptr_mask;
4834 }
4835 if (queue_hotplug)
4836 schedule_work(&rdev->hotplug_work);
4837 if (queue_reset)
4838 schedule_work(&rdev->reset_work);
4839 rdev->ih.rptr = rptr;
4840 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4841 atomic_set(&rdev->ih.lock, 0);
4842
4843 /* make sure wptr hasn't changed while processing */
4844 wptr = cik_get_ih_wptr(rdev);
4845 if (wptr != rptr)
4846 goto restart_ih;
4847
4848 return IRQ_HANDLED;
4849}
Alex Deucher7bf94a22012-08-17 11:48:29 -04004850
4851/*
4852 * startup/shutdown callbacks
4853 */
4854/**
4855 * cik_startup - program the asic to a functional state
4856 *
4857 * @rdev: radeon_device pointer
4858 *
4859 * Programs the asic to a functional state (CIK).
4860 * Called by cik_init() and cik_resume().
4861 * Returns 0 for success, error for failure.
4862 */
4863static int cik_startup(struct radeon_device *rdev)
4864{
4865 struct radeon_ring *ring;
4866 int r;
4867
4868 if (rdev->flags & RADEON_IS_IGP) {
4869 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4870 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4871 r = cik_init_microcode(rdev);
4872 if (r) {
4873 DRM_ERROR("Failed to load firmware!\n");
4874 return r;
4875 }
4876 }
4877 } else {
4878 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4879 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4880 !rdev->mc_fw) {
4881 r = cik_init_microcode(rdev);
4882 if (r) {
4883 DRM_ERROR("Failed to load firmware!\n");
4884 return r;
4885 }
4886 }
4887
4888 r = ci_mc_load_microcode(rdev);
4889 if (r) {
4890 DRM_ERROR("Failed to load MC firmware!\n");
4891 return r;
4892 }
4893 }
4894
4895 r = r600_vram_scratch_init(rdev);
4896 if (r)
4897 return r;
4898
4899 cik_mc_program(rdev);
4900 r = cik_pcie_gart_enable(rdev);
4901 if (r)
4902 return r;
4903 cik_gpu_init(rdev);
4904
4905 /* allocate rlc buffers */
4906 r = si_rlc_init(rdev);
4907 if (r) {
4908 DRM_ERROR("Failed to init rlc BOs!\n");
4909 return r;
4910 }
4911
4912 /* allocate wb buffer */
4913 r = radeon_wb_init(rdev);
4914 if (r)
4915 return r;
4916
4917 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4918 if (r) {
4919 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4920 return r;
4921 }
4922
4923 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4924 if (r) {
4925 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4926 return r;
4927 }
4928
4929 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4930 if (r) {
4931 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4932 return r;
4933 }
4934
Christian König87167bb2013-04-09 13:39:21 -04004935 r = cik_uvd_resume(rdev);
4936 if (!r) {
4937 r = radeon_fence_driver_start_ring(rdev,
4938 R600_RING_TYPE_UVD_INDEX);
4939 if (r)
4940 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
4941 }
4942 if (r)
4943 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
4944
Alex Deucher7bf94a22012-08-17 11:48:29 -04004945 /* Enable IRQ */
4946 if (!rdev->irq.installed) {
4947 r = radeon_irq_kms_init(rdev);
4948 if (r)
4949 return r;
4950 }
4951
4952 r = cik_irq_init(rdev);
4953 if (r) {
4954 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4955 radeon_irq_kms_fini(rdev);
4956 return r;
4957 }
4958 cik_irq_set(rdev);
4959
4960 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4961 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4962 CP_RB0_RPTR, CP_RB0_WPTR,
4963 0, 0xfffff, RADEON_CP_PACKET2);
4964 if (r)
4965 return r;
4966
4967 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4968 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4969 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4970 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4971 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4972 if (r)
4973 return r;
4974
4975 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4976 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4977 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4978 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4979 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4980 if (r)
4981 return r;
4982
4983 r = cik_cp_resume(rdev);
4984 if (r)
4985 return r;
4986
4987 r = cik_sdma_resume(rdev);
4988 if (r)
4989 return r;
4990
Christian König87167bb2013-04-09 13:39:21 -04004991 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
4992 if (ring->ring_size) {
4993 r = radeon_ring_init(rdev, ring, ring->ring_size,
4994 R600_WB_UVD_RPTR_OFFSET,
4995 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
4996 0, 0xfffff, RADEON_CP_PACKET2);
4997 if (!r)
4998 r = r600_uvd_init(rdev);
4999 if (r)
5000 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5001 }
5002
Alex Deucher7bf94a22012-08-17 11:48:29 -04005003 r = radeon_ib_pool_init(rdev);
5004 if (r) {
5005 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5006 return r;
5007 }
5008
5009 r = radeon_vm_manager_init(rdev);
5010 if (r) {
5011 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5012 return r;
5013 }
5014
5015 return 0;
5016}
5017
5018/**
5019 * cik_resume - resume the asic to a functional state
5020 *
5021 * @rdev: radeon_device pointer
5022 *
5023 * Programs the asic to a functional state (CIK).
5024 * Called at resume.
5025 * Returns 0 for success, error for failure.
5026 */
5027int cik_resume(struct radeon_device *rdev)
5028{
5029 int r;
5030
5031 /* post card */
5032 atom_asic_init(rdev->mode_info.atom_context);
5033
5034 rdev->accel_working = true;
5035 r = cik_startup(rdev);
5036 if (r) {
5037 DRM_ERROR("cik startup failed on resume\n");
5038 rdev->accel_working = false;
5039 return r;
5040 }
5041
5042 return r;
5043
5044}
5045
5046/**
5047 * cik_suspend - suspend the asic
5048 *
5049 * @rdev: radeon_device pointer
5050 *
5051 * Bring the chip into a state suitable for suspend (CIK).
5052 * Called at suspend.
5053 * Returns 0 for success.
5054 */
5055int cik_suspend(struct radeon_device *rdev)
5056{
5057 radeon_vm_manager_fini(rdev);
5058 cik_cp_enable(rdev, false);
5059 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04005060 r600_uvd_rbc_stop(rdev);
5061 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005062 cik_irq_suspend(rdev);
5063 radeon_wb_disable(rdev);
5064 cik_pcie_gart_disable(rdev);
5065 return 0;
5066}
5067
5068/* Plan is to move initialization in that function and use
5069 * helper function so that radeon_device_init pretty much
5070 * do nothing more than calling asic specific function. This
5071 * should also allow to remove a bunch of callback function
5072 * like vram_info.
5073 */
5074/**
5075 * cik_init - asic specific driver and hw init
5076 *
5077 * @rdev: radeon_device pointer
5078 *
5079 * Setup asic specific driver variables and program the hw
5080 * to a functional state (CIK).
5081 * Called at driver startup.
5082 * Returns 0 for success, errors for failure.
5083 */
5084int cik_init(struct radeon_device *rdev)
5085{
5086 struct radeon_ring *ring;
5087 int r;
5088
5089 /* Read BIOS */
5090 if (!radeon_get_bios(rdev)) {
5091 if (ASIC_IS_AVIVO(rdev))
5092 return -EINVAL;
5093 }
5094 /* Must be an ATOMBIOS */
5095 if (!rdev->is_atom_bios) {
5096 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5097 return -EINVAL;
5098 }
5099 r = radeon_atombios_init(rdev);
5100 if (r)
5101 return r;
5102
5103 /* Post card if necessary */
5104 if (!radeon_card_posted(rdev)) {
5105 if (!rdev->bios) {
5106 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5107 return -EINVAL;
5108 }
5109 DRM_INFO("GPU not posted. posting now...\n");
5110 atom_asic_init(rdev->mode_info.atom_context);
5111 }
5112 /* Initialize scratch registers */
5113 cik_scratch_init(rdev);
5114 /* Initialize surface registers */
5115 radeon_surface_init(rdev);
5116 /* Initialize clocks */
5117 radeon_get_clock_info(rdev->ddev);
5118
5119 /* Fence driver */
5120 r = radeon_fence_driver_init(rdev);
5121 if (r)
5122 return r;
5123
5124 /* initialize memory controller */
5125 r = cik_mc_init(rdev);
5126 if (r)
5127 return r;
5128 /* Memory manager */
5129 r = radeon_bo_init(rdev);
5130 if (r)
5131 return r;
5132
5133 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5134 ring->ring_obj = NULL;
5135 r600_ring_init(rdev, ring, 1024 * 1024);
5136
5137 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5138 ring->ring_obj = NULL;
5139 r600_ring_init(rdev, ring, 256 * 1024);
5140
5141 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5142 ring->ring_obj = NULL;
5143 r600_ring_init(rdev, ring, 256 * 1024);
5144
Christian König87167bb2013-04-09 13:39:21 -04005145 r = radeon_uvd_init(rdev);
5146 if (!r) {
5147 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5148 ring->ring_obj = NULL;
5149 r600_ring_init(rdev, ring, 4096);
5150 }
5151
Alex Deucher7bf94a22012-08-17 11:48:29 -04005152 rdev->ih.ring_obj = NULL;
5153 r600_ih_ring_init(rdev, 64 * 1024);
5154
5155 r = r600_pcie_gart_init(rdev);
5156 if (r)
5157 return r;
5158
5159 rdev->accel_working = true;
5160 r = cik_startup(rdev);
5161 if (r) {
5162 dev_err(rdev->dev, "disabling GPU acceleration\n");
5163 cik_cp_fini(rdev);
5164 cik_sdma_fini(rdev);
5165 cik_irq_fini(rdev);
5166 si_rlc_fini(rdev);
5167 radeon_wb_fini(rdev);
5168 radeon_ib_pool_fini(rdev);
5169 radeon_vm_manager_fini(rdev);
5170 radeon_irq_kms_fini(rdev);
5171 cik_pcie_gart_fini(rdev);
5172 rdev->accel_working = false;
5173 }
5174
5175 /* Don't start up if the MC ucode is missing.
5176 * The default clocks and voltages before the MC ucode
5177 * is loaded are not suffient for advanced operations.
5178 */
5179 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5180 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5181 return -EINVAL;
5182 }
5183
5184 return 0;
5185}
5186
5187/**
5188 * cik_fini - asic specific driver and hw fini
5189 *
5190 * @rdev: radeon_device pointer
5191 *
5192 * Tear down the asic specific driver variables and program the hw
5193 * to an idle state (CIK).
5194 * Called at driver unload.
5195 */
5196void cik_fini(struct radeon_device *rdev)
5197{
5198 cik_cp_fini(rdev);
5199 cik_sdma_fini(rdev);
5200 cik_irq_fini(rdev);
5201 si_rlc_fini(rdev);
5202 radeon_wb_fini(rdev);
5203 radeon_vm_manager_fini(rdev);
5204 radeon_ib_pool_fini(rdev);
5205 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04005206 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04005207 cik_pcie_gart_fini(rdev);
5208 r600_vram_scratch_fini(rdev);
5209 radeon_gem_fini(rdev);
5210 radeon_fence_driver_fini(rdev);
5211 radeon_bo_fini(rdev);
5212 radeon_atombios_fini(rdev);
5213 kfree(rdev->bios);
5214 rdev->bios = NULL;
5215}
Alex Deuchercd84a272012-07-20 17:13:13 -04005216
5217/* display watermark setup */
5218/**
5219 * dce8_line_buffer_adjust - Set up the line buffer
5220 *
5221 * @rdev: radeon_device pointer
5222 * @radeon_crtc: the selected display controller
5223 * @mode: the current display mode on the selected display
5224 * controller
5225 *
5226 * Setup up the line buffer allocation for
5227 * the selected display controller (CIK).
5228 * Returns the line buffer size in pixels.
5229 */
5230static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5231 struct radeon_crtc *radeon_crtc,
5232 struct drm_display_mode *mode)
5233{
5234 u32 tmp;
5235
5236 /*
5237 * Line Buffer Setup
5238 * There are 6 line buffers, one for each display controllers.
5239 * There are 3 partitions per LB. Select the number of partitions
5240 * to enable based on the display width. For display widths larger
5241 * than 4096, you need use to use 2 display controllers and combine
5242 * them using the stereo blender.
5243 */
5244 if (radeon_crtc->base.enabled && mode) {
5245 if (mode->crtc_hdisplay < 1920)
5246 tmp = 1;
5247 else if (mode->crtc_hdisplay < 2560)
5248 tmp = 2;
5249 else if (mode->crtc_hdisplay < 4096)
5250 tmp = 0;
5251 else {
5252 DRM_DEBUG_KMS("Mode too big for LB!\n");
5253 tmp = 0;
5254 }
5255 } else
5256 tmp = 1;
5257
5258 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5259 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5260
5261 if (radeon_crtc->base.enabled && mode) {
5262 switch (tmp) {
5263 case 0:
5264 default:
5265 return 4096 * 2;
5266 case 1:
5267 return 1920 * 2;
5268 case 2:
5269 return 2560 * 2;
5270 }
5271 }
5272
5273 /* controller not enabled, so no lb used */
5274 return 0;
5275}
5276
5277/**
5278 * cik_get_number_of_dram_channels - get the number of dram channels
5279 *
5280 * @rdev: radeon_device pointer
5281 *
5282 * Look up the number of video ram channels (CIK).
5283 * Used for display watermark bandwidth calculations
5284 * Returns the number of dram channels
5285 */
5286static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5287{
5288 u32 tmp = RREG32(MC_SHARED_CHMAP);
5289
5290 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5291 case 0:
5292 default:
5293 return 1;
5294 case 1:
5295 return 2;
5296 case 2:
5297 return 4;
5298 case 3:
5299 return 8;
5300 case 4:
5301 return 3;
5302 case 5:
5303 return 6;
5304 case 6:
5305 return 10;
5306 case 7:
5307 return 12;
5308 case 8:
5309 return 16;
5310 }
5311}
5312
5313struct dce8_wm_params {
5314 u32 dram_channels; /* number of dram channels */
5315 u32 yclk; /* bandwidth per dram data pin in kHz */
5316 u32 sclk; /* engine clock in kHz */
5317 u32 disp_clk; /* display clock in kHz */
5318 u32 src_width; /* viewport width */
5319 u32 active_time; /* active display time in ns */
5320 u32 blank_time; /* blank time in ns */
5321 bool interlaced; /* mode is interlaced */
5322 fixed20_12 vsc; /* vertical scale ratio */
5323 u32 num_heads; /* number of active crtcs */
5324 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5325 u32 lb_size; /* line buffer allocated to pipe */
5326 u32 vtaps; /* vertical scaler taps */
5327};
5328
5329/**
5330 * dce8_dram_bandwidth - get the dram bandwidth
5331 *
5332 * @wm: watermark calculation data
5333 *
5334 * Calculate the raw dram bandwidth (CIK).
5335 * Used for display watermark bandwidth calculations
5336 * Returns the dram bandwidth in MBytes/s
5337 */
5338static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5339{
5340 /* Calculate raw DRAM Bandwidth */
5341 fixed20_12 dram_efficiency; /* 0.7 */
5342 fixed20_12 yclk, dram_channels, bandwidth;
5343 fixed20_12 a;
5344
5345 a.full = dfixed_const(1000);
5346 yclk.full = dfixed_const(wm->yclk);
5347 yclk.full = dfixed_div(yclk, a);
5348 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5349 a.full = dfixed_const(10);
5350 dram_efficiency.full = dfixed_const(7);
5351 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5352 bandwidth.full = dfixed_mul(dram_channels, yclk);
5353 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5354
5355 return dfixed_trunc(bandwidth);
5356}
5357
5358/**
5359 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5360 *
5361 * @wm: watermark calculation data
5362 *
5363 * Calculate the dram bandwidth used for display (CIK).
5364 * Used for display watermark bandwidth calculations
5365 * Returns the dram bandwidth for display in MBytes/s
5366 */
5367static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5368{
5369 /* Calculate DRAM Bandwidth and the part allocated to display. */
5370 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5371 fixed20_12 yclk, dram_channels, bandwidth;
5372 fixed20_12 a;
5373
5374 a.full = dfixed_const(1000);
5375 yclk.full = dfixed_const(wm->yclk);
5376 yclk.full = dfixed_div(yclk, a);
5377 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5378 a.full = dfixed_const(10);
5379 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5380 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5381 bandwidth.full = dfixed_mul(dram_channels, yclk);
5382 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5383
5384 return dfixed_trunc(bandwidth);
5385}
5386
5387/**
5388 * dce8_data_return_bandwidth - get the data return bandwidth
5389 *
5390 * @wm: watermark calculation data
5391 *
5392 * Calculate the data return bandwidth used for display (CIK).
5393 * Used for display watermark bandwidth calculations
5394 * Returns the data return bandwidth in MBytes/s
5395 */
5396static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5397{
5398 /* Calculate the display Data return Bandwidth */
5399 fixed20_12 return_efficiency; /* 0.8 */
5400 fixed20_12 sclk, bandwidth;
5401 fixed20_12 a;
5402
5403 a.full = dfixed_const(1000);
5404 sclk.full = dfixed_const(wm->sclk);
5405 sclk.full = dfixed_div(sclk, a);
5406 a.full = dfixed_const(10);
5407 return_efficiency.full = dfixed_const(8);
5408 return_efficiency.full = dfixed_div(return_efficiency, a);
5409 a.full = dfixed_const(32);
5410 bandwidth.full = dfixed_mul(a, sclk);
5411 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5412
5413 return dfixed_trunc(bandwidth);
5414}
5415
5416/**
5417 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5418 *
5419 * @wm: watermark calculation data
5420 *
5421 * Calculate the dmif bandwidth used for display (CIK).
5422 * Used for display watermark bandwidth calculations
5423 * Returns the dmif bandwidth in MBytes/s
5424 */
5425static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5426{
5427 /* Calculate the DMIF Request Bandwidth */
5428 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5429 fixed20_12 disp_clk, bandwidth;
5430 fixed20_12 a, b;
5431
5432 a.full = dfixed_const(1000);
5433 disp_clk.full = dfixed_const(wm->disp_clk);
5434 disp_clk.full = dfixed_div(disp_clk, a);
5435 a.full = dfixed_const(32);
5436 b.full = dfixed_mul(a, disp_clk);
5437
5438 a.full = dfixed_const(10);
5439 disp_clk_request_efficiency.full = dfixed_const(8);
5440 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5441
5442 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5443
5444 return dfixed_trunc(bandwidth);
5445}
5446
5447/**
5448 * dce8_available_bandwidth - get the min available bandwidth
5449 *
5450 * @wm: watermark calculation data
5451 *
5452 * Calculate the min available bandwidth used for display (CIK).
5453 * Used for display watermark bandwidth calculations
5454 * Returns the min available bandwidth in MBytes/s
5455 */
5456static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5457{
5458 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5459 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5460 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5461 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5462
5463 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5464}
5465
5466/**
5467 * dce8_average_bandwidth - get the average available bandwidth
5468 *
5469 * @wm: watermark calculation data
5470 *
5471 * Calculate the average available bandwidth used for display (CIK).
5472 * Used for display watermark bandwidth calculations
5473 * Returns the average available bandwidth in MBytes/s
5474 */
5475static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5476{
5477 /* Calculate the display mode Average Bandwidth
5478 * DisplayMode should contain the source and destination dimensions,
5479 * timing, etc.
5480 */
5481 fixed20_12 bpp;
5482 fixed20_12 line_time;
5483 fixed20_12 src_width;
5484 fixed20_12 bandwidth;
5485 fixed20_12 a;
5486
5487 a.full = dfixed_const(1000);
5488 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5489 line_time.full = dfixed_div(line_time, a);
5490 bpp.full = dfixed_const(wm->bytes_per_pixel);
5491 src_width.full = dfixed_const(wm->src_width);
5492 bandwidth.full = dfixed_mul(src_width, bpp);
5493 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5494 bandwidth.full = dfixed_div(bandwidth, line_time);
5495
5496 return dfixed_trunc(bandwidth);
5497}
5498
5499/**
5500 * dce8_latency_watermark - get the latency watermark
5501 *
5502 * @wm: watermark calculation data
5503 *
5504 * Calculate the latency watermark (CIK).
5505 * Used for display watermark bandwidth calculations
5506 * Returns the latency watermark in ns
5507 */
5508static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5509{
5510 /* First calculate the latency in ns */
5511 u32 mc_latency = 2000; /* 2000 ns. */
5512 u32 available_bandwidth = dce8_available_bandwidth(wm);
5513 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5514 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5515 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5516 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5517 (wm->num_heads * cursor_line_pair_return_time);
5518 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5519 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5520 u32 tmp, dmif_size = 12288;
5521 fixed20_12 a, b, c;
5522
5523 if (wm->num_heads == 0)
5524 return 0;
5525
5526 a.full = dfixed_const(2);
5527 b.full = dfixed_const(1);
5528 if ((wm->vsc.full > a.full) ||
5529 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5530 (wm->vtaps >= 5) ||
5531 ((wm->vsc.full >= a.full) && wm->interlaced))
5532 max_src_lines_per_dst_line = 4;
5533 else
5534 max_src_lines_per_dst_line = 2;
5535
5536 a.full = dfixed_const(available_bandwidth);
5537 b.full = dfixed_const(wm->num_heads);
5538 a.full = dfixed_div(a, b);
5539
5540 b.full = dfixed_const(mc_latency + 512);
5541 c.full = dfixed_const(wm->disp_clk);
5542 b.full = dfixed_div(b, c);
5543
5544 c.full = dfixed_const(dmif_size);
5545 b.full = dfixed_div(c, b);
5546
5547 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5548
5549 b.full = dfixed_const(1000);
5550 c.full = dfixed_const(wm->disp_clk);
5551 b.full = dfixed_div(c, b);
5552 c.full = dfixed_const(wm->bytes_per_pixel);
5553 b.full = dfixed_mul(b, c);
5554
5555 lb_fill_bw = min(tmp, dfixed_trunc(b));
5556
5557 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5558 b.full = dfixed_const(1000);
5559 c.full = dfixed_const(lb_fill_bw);
5560 b.full = dfixed_div(c, b);
5561 a.full = dfixed_div(a, b);
5562 line_fill_time = dfixed_trunc(a);
5563
5564 if (line_fill_time < wm->active_time)
5565 return latency;
5566 else
5567 return latency + (line_fill_time - wm->active_time);
5568
5569}
5570
5571/**
5572 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5573 * average and available dram bandwidth
5574 *
5575 * @wm: watermark calculation data
5576 *
5577 * Check if the display average bandwidth fits in the display
5578 * dram bandwidth (CIK).
5579 * Used for display watermark bandwidth calculations
5580 * Returns true if the display fits, false if not.
5581 */
5582static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5583{
5584 if (dce8_average_bandwidth(wm) <=
5585 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5586 return true;
5587 else
5588 return false;
5589}
5590
5591/**
5592 * dce8_average_bandwidth_vs_available_bandwidth - check
5593 * average and available bandwidth
5594 *
5595 * @wm: watermark calculation data
5596 *
5597 * Check if the display average bandwidth fits in the display
5598 * available bandwidth (CIK).
5599 * Used for display watermark bandwidth calculations
5600 * Returns true if the display fits, false if not.
5601 */
5602static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5603{
5604 if (dce8_average_bandwidth(wm) <=
5605 (dce8_available_bandwidth(wm) / wm->num_heads))
5606 return true;
5607 else
5608 return false;
5609}
5610
5611/**
5612 * dce8_check_latency_hiding - check latency hiding
5613 *
5614 * @wm: watermark calculation data
5615 *
5616 * Check latency hiding (CIK).
5617 * Used for display watermark bandwidth calculations
5618 * Returns true if the display fits, false if not.
5619 */
5620static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5621{
5622 u32 lb_partitions = wm->lb_size / wm->src_width;
5623 u32 line_time = wm->active_time + wm->blank_time;
5624 u32 latency_tolerant_lines;
5625 u32 latency_hiding;
5626 fixed20_12 a;
5627
5628 a.full = dfixed_const(1);
5629 if (wm->vsc.full > a.full)
5630 latency_tolerant_lines = 1;
5631 else {
5632 if (lb_partitions <= (wm->vtaps + 1))
5633 latency_tolerant_lines = 1;
5634 else
5635 latency_tolerant_lines = 2;
5636 }
5637
5638 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5639
5640 if (dce8_latency_watermark(wm) <= latency_hiding)
5641 return true;
5642 else
5643 return false;
5644}
5645
5646/**
5647 * dce8_program_watermarks - program display watermarks
5648 *
5649 * @rdev: radeon_device pointer
5650 * @radeon_crtc: the selected display controller
5651 * @lb_size: line buffer size
5652 * @num_heads: number of display controllers in use
5653 *
5654 * Calculate and program the display watermarks for the
5655 * selected display controller (CIK).
5656 */
5657static void dce8_program_watermarks(struct radeon_device *rdev,
5658 struct radeon_crtc *radeon_crtc,
5659 u32 lb_size, u32 num_heads)
5660{
5661 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5662 struct dce8_wm_params wm;
5663 u32 pixel_period;
5664 u32 line_time = 0;
5665 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5666 u32 tmp, wm_mask;
5667
5668 if (radeon_crtc->base.enabled && num_heads && mode) {
5669 pixel_period = 1000000 / (u32)mode->clock;
5670 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5671
5672 wm.yclk = rdev->pm.current_mclk * 10;
5673 wm.sclk = rdev->pm.current_sclk * 10;
5674 wm.disp_clk = mode->clock;
5675 wm.src_width = mode->crtc_hdisplay;
5676 wm.active_time = mode->crtc_hdisplay * pixel_period;
5677 wm.blank_time = line_time - wm.active_time;
5678 wm.interlaced = false;
5679 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5680 wm.interlaced = true;
5681 wm.vsc = radeon_crtc->vsc;
5682 wm.vtaps = 1;
5683 if (radeon_crtc->rmx_type != RMX_OFF)
5684 wm.vtaps = 2;
5685 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5686 wm.lb_size = lb_size;
5687 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5688 wm.num_heads = num_heads;
5689
5690 /* set for high clocks */
5691 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5692 /* set for low clocks */
5693 /* wm.yclk = low clk; wm.sclk = low clk */
5694 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5695
5696 /* possibly force display priority to high */
5697 /* should really do this at mode validation time... */
5698 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5699 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5700 !dce8_check_latency_hiding(&wm) ||
5701 (rdev->disp_priority == 2)) {
5702 DRM_DEBUG_KMS("force priority to high\n");
5703 }
5704 }
5705
5706 /* select wm A */
5707 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5708 tmp = wm_mask;
5709 tmp &= ~LATENCY_WATERMARK_MASK(3);
5710 tmp |= LATENCY_WATERMARK_MASK(1);
5711 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5712 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5713 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5714 LATENCY_HIGH_WATERMARK(line_time)));
5715 /* select wm B */
5716 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5717 tmp &= ~LATENCY_WATERMARK_MASK(3);
5718 tmp |= LATENCY_WATERMARK_MASK(2);
5719 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5720 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5721 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5722 LATENCY_HIGH_WATERMARK(line_time)));
5723 /* restore original selection */
5724 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5725}
5726
5727/**
5728 * dce8_bandwidth_update - program display watermarks
5729 *
5730 * @rdev: radeon_device pointer
5731 *
5732 * Calculate and program the display watermarks and line
5733 * buffer allocation (CIK).
5734 */
5735void dce8_bandwidth_update(struct radeon_device *rdev)
5736{
5737 struct drm_display_mode *mode = NULL;
5738 u32 num_heads = 0, lb_size;
5739 int i;
5740
5741 radeon_update_display_priority(rdev);
5742
5743 for (i = 0; i < rdev->num_crtc; i++) {
5744 if (rdev->mode_info.crtcs[i]->base.enabled)
5745 num_heads++;
5746 }
5747 for (i = 0; i < rdev->num_crtc; i++) {
5748 mode = &rdev->mode_info.crtcs[i]->base.mode;
5749 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5750 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5751 }
5752}
Alex Deucher44fa3462012-12-18 22:17:00 -05005753
5754/**
5755 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5756 *
5757 * @rdev: radeon_device pointer
5758 *
5759 * Fetches a GPU clock counter snapshot (SI).
5760 * Returns the 64 bit clock counter snapshot.
5761 */
5762uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5763{
5764 uint64_t clock;
5765
5766 mutex_lock(&rdev->gpu_clock_mutex);
5767 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5768 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5769 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5770 mutex_unlock(&rdev->gpu_clock_mutex);
5771 return clock;
5772}
5773
Christian König87167bb2013-04-09 13:39:21 -04005774static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
5775 u32 cntl_reg, u32 status_reg)
5776{
5777 int r, i;
5778 struct atom_clock_dividers dividers;
5779 uint32_t tmp;
5780
5781 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
5782 clock, false, &dividers);
5783 if (r)
5784 return r;
5785
5786 tmp = RREG32_SMC(cntl_reg);
5787 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
5788 tmp |= dividers.post_divider;
5789 WREG32_SMC(cntl_reg, tmp);
5790
5791 for (i = 0; i < 100; i++) {
5792 if (RREG32_SMC(status_reg) & DCLK_STATUS)
5793 break;
5794 mdelay(10);
5795 }
5796 if (i == 100)
5797 return -ETIMEDOUT;
5798
5799 return 0;
5800}
5801
5802int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5803{
5804 int r = 0;
5805
5806 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
5807 if (r)
5808 return r;
5809
5810 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
5811 return r;
5812}
5813
5814int cik_uvd_resume(struct radeon_device *rdev)
5815{
5816 uint64_t addr;
5817 uint32_t size;
5818 int r;
5819
5820 r = radeon_uvd_resume(rdev);
5821 if (r)
5822 return r;
5823
5824 /* programm the VCPU memory controller bits 0-27 */
5825 addr = rdev->uvd.gpu_addr >> 3;
5826 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
5827 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
5828 WREG32(UVD_VCPU_CACHE_SIZE0, size);
5829
5830 addr += size;
5831 size = RADEON_UVD_STACK_SIZE >> 3;
5832 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
5833 WREG32(UVD_VCPU_CACHE_SIZE1, size);
5834
5835 addr += size;
5836 size = RADEON_UVD_HEAP_SIZE >> 3;
5837 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
5838 WREG32(UVD_VCPU_CACHE_SIZE2, size);
5839
5840 /* bits 28-31 */
5841 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
5842 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
5843
5844 /* bits 32-39 */
5845 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
5846 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
5847
5848 return 0;
5849}