blob: cbfb028ac538249bafa7d0ecf052dec0e8b78eaa [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucherbc8273f2012-06-29 19:44:04 -040081#define BONAIRE_IO_MC_REGS_SIZE 36
82
83static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
84{
85 {0x00000070, 0x04400000},
86 {0x00000071, 0x80c01803},
87 {0x00000072, 0x00004004},
88 {0x00000073, 0x00000100},
89 {0x00000074, 0x00ff0000},
90 {0x00000075, 0x34000000},
91 {0x00000076, 0x08000014},
92 {0x00000077, 0x00cc08ec},
93 {0x00000078, 0x00000400},
94 {0x00000079, 0x00000000},
95 {0x0000007a, 0x04090000},
96 {0x0000007c, 0x00000000},
97 {0x0000007e, 0x4408a8e8},
98 {0x0000007f, 0x00000304},
99 {0x00000080, 0x00000000},
100 {0x00000082, 0x00000001},
101 {0x00000083, 0x00000002},
102 {0x00000084, 0xf3e4f400},
103 {0x00000085, 0x052024e3},
104 {0x00000087, 0x00000000},
105 {0x00000088, 0x01000000},
106 {0x0000008a, 0x1c0a0000},
107 {0x0000008b, 0xff010000},
108 {0x0000008d, 0xffffefff},
109 {0x0000008e, 0xfff3efff},
110 {0x0000008f, 0xfff3efbf},
111 {0x00000092, 0xf7ffffff},
112 {0x00000093, 0xffffff7f},
113 {0x00000095, 0x00101101},
114 {0x00000096, 0x00000fff},
115 {0x00000097, 0x00116fff},
116 {0x00000098, 0x60010000},
117 {0x00000099, 0x10010000},
118 {0x0000009a, 0x00006000},
119 {0x0000009b, 0x00001000},
120 {0x0000009f, 0x00b48000}
121};
122
123/* ucode loading */
124/**
125 * ci_mc_load_microcode - load MC ucode into the hw
126 *
127 * @rdev: radeon_device pointer
128 *
129 * Load the GDDR MC ucode into the hw (CIK).
130 * Returns 0 on success, error on failure.
131 */
132static int ci_mc_load_microcode(struct radeon_device *rdev)
133{
134 const __be32 *fw_data;
135 u32 running, blackout = 0;
136 u32 *io_mc_regs;
137 int i, ucode_size, regs_size;
138
139 if (!rdev->mc_fw)
140 return -EINVAL;
141
142 switch (rdev->family) {
143 case CHIP_BONAIRE:
144 default:
145 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
146 ucode_size = CIK_MC_UCODE_SIZE;
147 regs_size = BONAIRE_IO_MC_REGS_SIZE;
148 break;
149 }
150
151 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
152
153 if (running == 0) {
154 if (running) {
155 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
156 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
157 }
158
159 /* reset the engine and set to writable */
160 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
161 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
162
163 /* load mc io regs */
164 for (i = 0; i < regs_size; i++) {
165 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
166 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
167 }
168 /* load the MC ucode */
169 fw_data = (const __be32 *)rdev->mc_fw->data;
170 for (i = 0; i < ucode_size; i++)
171 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
172
173 /* put the engine back into the active state */
174 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
175 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
176 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
177
178 /* wait for training to complete */
179 for (i = 0; i < rdev->usec_timeout; i++) {
180 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
181 break;
182 udelay(1);
183 }
184 for (i = 0; i < rdev->usec_timeout; i++) {
185 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
186 break;
187 udelay(1);
188 }
189
190 if (running)
191 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
192 }
193
194 return 0;
195}
196
Alex Deucher02c81322012-12-18 21:43:07 -0500197/**
198 * cik_init_microcode - load ucode images from disk
199 *
200 * @rdev: radeon_device pointer
201 *
202 * Use the firmware interface to load the ucode images into
203 * the driver (not loaded into hw).
204 * Returns 0 on success, error on failure.
205 */
206static int cik_init_microcode(struct radeon_device *rdev)
207{
208 struct platform_device *pdev;
209 const char *chip_name;
210 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400211 mec_req_size, rlc_req_size, mc_req_size,
212 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500213 char fw_name[30];
214 int err;
215
216 DRM_DEBUG("\n");
217
218 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
219 err = IS_ERR(pdev);
220 if (err) {
221 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
222 return -EINVAL;
223 }
224
225 switch (rdev->family) {
226 case CHIP_BONAIRE:
227 chip_name = "BONAIRE";
228 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
229 me_req_size = CIK_ME_UCODE_SIZE * 4;
230 ce_req_size = CIK_CE_UCODE_SIZE * 4;
231 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
232 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
233 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400234 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500235 break;
236 case CHIP_KAVERI:
237 chip_name = "KAVERI";
238 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
239 me_req_size = CIK_ME_UCODE_SIZE * 4;
240 ce_req_size = CIK_CE_UCODE_SIZE * 4;
241 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
242 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400243 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500244 break;
245 case CHIP_KABINI:
246 chip_name = "KABINI";
247 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
248 me_req_size = CIK_ME_UCODE_SIZE * 4;
249 ce_req_size = CIK_CE_UCODE_SIZE * 4;
250 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
251 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400252 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500253 break;
254 default: BUG();
255 }
256
257 DRM_INFO("Loading %s Microcode\n", chip_name);
258
259 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
260 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
261 if (err)
262 goto out;
263 if (rdev->pfp_fw->size != pfp_req_size) {
264 printk(KERN_ERR
265 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
266 rdev->pfp_fw->size, fw_name);
267 err = -EINVAL;
268 goto out;
269 }
270
271 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
272 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
273 if (err)
274 goto out;
275 if (rdev->me_fw->size != me_req_size) {
276 printk(KERN_ERR
277 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
278 rdev->me_fw->size, fw_name);
279 err = -EINVAL;
280 }
281
282 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
283 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
284 if (err)
285 goto out;
286 if (rdev->ce_fw->size != ce_req_size) {
287 printk(KERN_ERR
288 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
289 rdev->ce_fw->size, fw_name);
290 err = -EINVAL;
291 }
292
293 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
294 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
295 if (err)
296 goto out;
297 if (rdev->mec_fw->size != mec_req_size) {
298 printk(KERN_ERR
299 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
300 rdev->mec_fw->size, fw_name);
301 err = -EINVAL;
302 }
303
304 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
305 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
306 if (err)
307 goto out;
308 if (rdev->rlc_fw->size != rlc_req_size) {
309 printk(KERN_ERR
310 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
311 rdev->rlc_fw->size, fw_name);
312 err = -EINVAL;
313 }
314
Alex Deucher21a93e12013-04-09 12:47:11 -0400315 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
316 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
317 if (err)
318 goto out;
319 if (rdev->sdma_fw->size != sdma_req_size) {
320 printk(KERN_ERR
321 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
322 rdev->sdma_fw->size, fw_name);
323 err = -EINVAL;
324 }
325
Alex Deucher02c81322012-12-18 21:43:07 -0500326 /* No MC ucode on APUs */
327 if (!(rdev->flags & RADEON_IS_IGP)) {
328 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
329 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
330 if (err)
331 goto out;
332 if (rdev->mc_fw->size != mc_req_size) {
333 printk(KERN_ERR
334 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
335 rdev->mc_fw->size, fw_name);
336 err = -EINVAL;
337 }
338 }
339
340out:
341 platform_device_unregister(pdev);
342
343 if (err) {
344 if (err != -EINVAL)
345 printk(KERN_ERR
346 "cik_cp: Failed to load firmware \"%s\"\n",
347 fw_name);
348 release_firmware(rdev->pfp_fw);
349 rdev->pfp_fw = NULL;
350 release_firmware(rdev->me_fw);
351 rdev->me_fw = NULL;
352 release_firmware(rdev->ce_fw);
353 rdev->ce_fw = NULL;
354 release_firmware(rdev->rlc_fw);
355 rdev->rlc_fw = NULL;
356 release_firmware(rdev->mc_fw);
357 rdev->mc_fw = NULL;
358 }
359 return err;
360}
361
Alex Deucher8cc1a532013-04-09 12:41:24 -0400362/*
363 * Core functions
364 */
365/**
366 * cik_tiling_mode_table_init - init the hw tiling table
367 *
368 * @rdev: radeon_device pointer
369 *
370 * Starting with SI, the tiling setup is done globally in a
371 * set of 32 tiling modes. Rather than selecting each set of
372 * parameters per surface as on older asics, we just select
373 * which index in the tiling table we want to use, and the
374 * surface uses those parameters (CIK).
375 */
376static void cik_tiling_mode_table_init(struct radeon_device *rdev)
377{
378 const u32 num_tile_mode_states = 32;
379 const u32 num_secondary_tile_mode_states = 16;
380 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
381 u32 num_pipe_configs;
382 u32 num_rbs = rdev->config.cik.max_backends_per_se *
383 rdev->config.cik.max_shader_engines;
384
385 switch (rdev->config.cik.mem_row_size_in_kb) {
386 case 1:
387 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
388 break;
389 case 2:
390 default:
391 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
392 break;
393 case 4:
394 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
395 break;
396 }
397
398 num_pipe_configs = rdev->config.cik.max_tile_pipes;
399 if (num_pipe_configs > 8)
400 num_pipe_configs = 8; /* ??? */
401
402 if (num_pipe_configs == 8) {
403 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
404 switch (reg_offset) {
405 case 0:
406 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
408 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
410 break;
411 case 1:
412 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
413 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
414 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
416 break;
417 case 2:
418 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
420 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
421 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
422 break;
423 case 3:
424 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
426 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
427 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
428 break;
429 case 4:
430 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
432 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
433 TILE_SPLIT(split_equal_to_row_size));
434 break;
435 case 5:
436 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
438 break;
439 case 6:
440 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
442 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
443 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
444 break;
445 case 7:
446 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
449 TILE_SPLIT(split_equal_to_row_size));
450 break;
451 case 8:
452 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
453 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
454 break;
455 case 9:
456 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
457 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
458 break;
459 case 10:
460 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
461 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
462 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
464 break;
465 case 11:
466 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
467 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
468 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
470 break;
471 case 12:
472 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
473 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
474 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
476 break;
477 case 13:
478 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
479 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
480 break;
481 case 14:
482 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
483 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
484 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
486 break;
487 case 16:
488 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
489 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
490 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
492 break;
493 case 17:
494 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
498 break;
499 case 27:
500 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
502 break;
503 case 28:
504 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
506 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
508 break;
509 case 29:
510 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
514 break;
515 case 30:
516 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
520 break;
521 default:
522 gb_tile_moden = 0;
523 break;
524 }
525 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
526 }
527 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
528 switch (reg_offset) {
529 case 0:
530 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
533 NUM_BANKS(ADDR_SURF_16_BANK));
534 break;
535 case 1:
536 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
539 NUM_BANKS(ADDR_SURF_16_BANK));
540 break;
541 case 2:
542 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
543 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
544 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
545 NUM_BANKS(ADDR_SURF_16_BANK));
546 break;
547 case 3:
548 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
551 NUM_BANKS(ADDR_SURF_16_BANK));
552 break;
553 case 4:
554 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
557 NUM_BANKS(ADDR_SURF_8_BANK));
558 break;
559 case 5:
560 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
563 NUM_BANKS(ADDR_SURF_4_BANK));
564 break;
565 case 6:
566 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
569 NUM_BANKS(ADDR_SURF_2_BANK));
570 break;
571 case 8:
572 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
573 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
574 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
575 NUM_BANKS(ADDR_SURF_16_BANK));
576 break;
577 case 9:
578 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
581 NUM_BANKS(ADDR_SURF_16_BANK));
582 break;
583 case 10:
584 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
585 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
586 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
587 NUM_BANKS(ADDR_SURF_16_BANK));
588 break;
589 case 11:
590 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
593 NUM_BANKS(ADDR_SURF_16_BANK));
594 break;
595 case 12:
596 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
597 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
598 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
599 NUM_BANKS(ADDR_SURF_8_BANK));
600 break;
601 case 13:
602 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
605 NUM_BANKS(ADDR_SURF_4_BANK));
606 break;
607 case 14:
608 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
611 NUM_BANKS(ADDR_SURF_2_BANK));
612 break;
613 default:
614 gb_tile_moden = 0;
615 break;
616 }
617 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
618 }
619 } else if (num_pipe_configs == 4) {
620 if (num_rbs == 4) {
621 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
622 switch (reg_offset) {
623 case 0:
624 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
625 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
626 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
627 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
628 break;
629 case 1:
630 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
634 break;
635 case 2:
636 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
637 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
638 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
639 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
640 break;
641 case 3:
642 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
644 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
646 break;
647 case 4:
648 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
649 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
651 TILE_SPLIT(split_equal_to_row_size));
652 break;
653 case 5:
654 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
656 break;
657 case 6:
658 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
660 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
661 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
662 break;
663 case 7:
664 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
666 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
667 TILE_SPLIT(split_equal_to_row_size));
668 break;
669 case 8:
670 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
671 PIPE_CONFIG(ADDR_SURF_P4_16x16));
672 break;
673 case 9:
674 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
675 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
676 break;
677 case 10:
678 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
679 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
680 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
682 break;
683 case 11:
684 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
685 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
686 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
688 break;
689 case 12:
690 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
691 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
694 break;
695 case 13:
696 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
697 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
698 break;
699 case 14:
700 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
704 break;
705 case 16:
706 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
707 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
708 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
710 break;
711 case 17:
712 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
713 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
714 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
716 break;
717 case 27:
718 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
720 break;
721 case 28:
722 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
723 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
726 break;
727 case 29:
728 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
730 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
732 break;
733 case 30:
734 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
738 break;
739 default:
740 gb_tile_moden = 0;
741 break;
742 }
743 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
744 }
745 } else if (num_rbs < 4) {
746 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
747 switch (reg_offset) {
748 case 0:
749 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
750 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
751 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
752 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
753 break;
754 case 1:
755 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
756 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
757 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
758 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
759 break;
760 case 2:
761 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
762 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
763 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
764 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
765 break;
766 case 3:
767 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
769 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
771 break;
772 case 4:
773 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
775 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
776 TILE_SPLIT(split_equal_to_row_size));
777 break;
778 case 5:
779 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
780 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
781 break;
782 case 6:
783 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
785 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
787 break;
788 case 7:
789 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
790 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
791 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
792 TILE_SPLIT(split_equal_to_row_size));
793 break;
794 case 8:
795 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
796 PIPE_CONFIG(ADDR_SURF_P4_8x16));
797 break;
798 case 9:
799 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
800 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
801 break;
802 case 10:
803 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
805 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
807 break;
808 case 11:
809 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
810 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
811 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
813 break;
814 case 12:
815 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
816 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
817 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
819 break;
820 case 13:
821 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
822 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
823 break;
824 case 14:
825 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
826 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
827 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
828 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
829 break;
830 case 16:
831 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
833 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
835 break;
836 case 17:
837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
838 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
839 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
841 break;
842 case 27:
843 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
844 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
845 break;
846 case 28:
847 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
848 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
849 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
851 break;
852 case 29:
853 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
857 break;
858 case 30:
859 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
863 break;
864 default:
865 gb_tile_moden = 0;
866 break;
867 }
868 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
869 }
870 }
871 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
872 switch (reg_offset) {
873 case 0:
874 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
877 NUM_BANKS(ADDR_SURF_16_BANK));
878 break;
879 case 1:
880 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
881 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
882 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
883 NUM_BANKS(ADDR_SURF_16_BANK));
884 break;
885 case 2:
886 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
889 NUM_BANKS(ADDR_SURF_16_BANK));
890 break;
891 case 3:
892 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
893 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
894 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
895 NUM_BANKS(ADDR_SURF_16_BANK));
896 break;
897 case 4:
898 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
899 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
900 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
901 NUM_BANKS(ADDR_SURF_16_BANK));
902 break;
903 case 5:
904 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
907 NUM_BANKS(ADDR_SURF_8_BANK));
908 break;
909 case 6:
910 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
913 NUM_BANKS(ADDR_SURF_4_BANK));
914 break;
915 case 8:
916 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
919 NUM_BANKS(ADDR_SURF_16_BANK));
920 break;
921 case 9:
922 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
923 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
924 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
925 NUM_BANKS(ADDR_SURF_16_BANK));
926 break;
927 case 10:
928 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
931 NUM_BANKS(ADDR_SURF_16_BANK));
932 break;
933 case 11:
934 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
937 NUM_BANKS(ADDR_SURF_16_BANK));
938 break;
939 case 12:
940 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
943 NUM_BANKS(ADDR_SURF_16_BANK));
944 break;
945 case 13:
946 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
949 NUM_BANKS(ADDR_SURF_8_BANK));
950 break;
951 case 14:
952 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
955 NUM_BANKS(ADDR_SURF_4_BANK));
956 break;
957 default:
958 gb_tile_moden = 0;
959 break;
960 }
961 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
962 }
963 } else if (num_pipe_configs == 2) {
964 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
965 switch (reg_offset) {
966 case 0:
967 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P2) |
970 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
971 break;
972 case 1:
973 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
975 PIPE_CONFIG(ADDR_SURF_P2) |
976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
977 break;
978 case 2:
979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
980 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
981 PIPE_CONFIG(ADDR_SURF_P2) |
982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
983 break;
984 case 3:
985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
987 PIPE_CONFIG(ADDR_SURF_P2) |
988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
989 break;
990 case 4:
991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
993 PIPE_CONFIG(ADDR_SURF_P2) |
994 TILE_SPLIT(split_equal_to_row_size));
995 break;
996 case 5:
997 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
999 break;
1000 case 6:
1001 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1003 PIPE_CONFIG(ADDR_SURF_P2) |
1004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1005 break;
1006 case 7:
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P2) |
1010 TILE_SPLIT(split_equal_to_row_size));
1011 break;
1012 case 8:
1013 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1014 break;
1015 case 9:
1016 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1017 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1018 break;
1019 case 10:
1020 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1021 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1022 PIPE_CONFIG(ADDR_SURF_P2) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 break;
1025 case 11:
1026 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1027 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1028 PIPE_CONFIG(ADDR_SURF_P2) |
1029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1030 break;
1031 case 12:
1032 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1034 PIPE_CONFIG(ADDR_SURF_P2) |
1035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1036 break;
1037 case 13:
1038 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1039 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1040 break;
1041 case 14:
1042 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1044 PIPE_CONFIG(ADDR_SURF_P2) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 break;
1047 case 16:
1048 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1050 PIPE_CONFIG(ADDR_SURF_P2) |
1051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1052 break;
1053 case 17:
1054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1056 PIPE_CONFIG(ADDR_SURF_P2) |
1057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1058 break;
1059 case 27:
1060 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1061 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1062 break;
1063 case 28:
1064 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1065 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1066 PIPE_CONFIG(ADDR_SURF_P2) |
1067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1068 break;
1069 case 29:
1070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1072 PIPE_CONFIG(ADDR_SURF_P2) |
1073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1074 break;
1075 case 30:
1076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1077 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1078 PIPE_CONFIG(ADDR_SURF_P2) |
1079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1080 break;
1081 default:
1082 gb_tile_moden = 0;
1083 break;
1084 }
1085 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1086 }
1087 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1088 switch (reg_offset) {
1089 case 0:
1090 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1091 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1092 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1093 NUM_BANKS(ADDR_SURF_16_BANK));
1094 break;
1095 case 1:
1096 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1099 NUM_BANKS(ADDR_SURF_16_BANK));
1100 break;
1101 case 2:
1102 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1105 NUM_BANKS(ADDR_SURF_16_BANK));
1106 break;
1107 case 3:
1108 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1111 NUM_BANKS(ADDR_SURF_16_BANK));
1112 break;
1113 case 4:
1114 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1115 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1116 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1117 NUM_BANKS(ADDR_SURF_16_BANK));
1118 break;
1119 case 5:
1120 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1123 NUM_BANKS(ADDR_SURF_16_BANK));
1124 break;
1125 case 6:
1126 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1129 NUM_BANKS(ADDR_SURF_8_BANK));
1130 break;
1131 case 8:
1132 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1135 NUM_BANKS(ADDR_SURF_16_BANK));
1136 break;
1137 case 9:
1138 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1141 NUM_BANKS(ADDR_SURF_16_BANK));
1142 break;
1143 case 10:
1144 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1147 NUM_BANKS(ADDR_SURF_16_BANK));
1148 break;
1149 case 11:
1150 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1153 NUM_BANKS(ADDR_SURF_16_BANK));
1154 break;
1155 case 12:
1156 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1159 NUM_BANKS(ADDR_SURF_16_BANK));
1160 break;
1161 case 13:
1162 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1165 NUM_BANKS(ADDR_SURF_16_BANK));
1166 break;
1167 case 14:
1168 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1171 NUM_BANKS(ADDR_SURF_8_BANK));
1172 break;
1173 default:
1174 gb_tile_moden = 0;
1175 break;
1176 }
1177 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1178 }
1179 } else
1180 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1181}
1182
1183/**
1184 * cik_select_se_sh - select which SE, SH to address
1185 *
1186 * @rdev: radeon_device pointer
1187 * @se_num: shader engine to address
1188 * @sh_num: sh block to address
1189 *
1190 * Select which SE, SH combinations to address. Certain
1191 * registers are instanced per SE or SH. 0xffffffff means
1192 * broadcast to all SEs or SHs (CIK).
1193 */
1194static void cik_select_se_sh(struct radeon_device *rdev,
1195 u32 se_num, u32 sh_num)
1196{
1197 u32 data = INSTANCE_BROADCAST_WRITES;
1198
1199 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1200 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1201 else if (se_num == 0xffffffff)
1202 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1203 else if (sh_num == 0xffffffff)
1204 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1205 else
1206 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1207 WREG32(GRBM_GFX_INDEX, data);
1208}
1209
1210/**
1211 * cik_create_bitmask - create a bitmask
1212 *
1213 * @bit_width: length of the mask
1214 *
1215 * create a variable length bit mask (CIK).
1216 * Returns the bitmask.
1217 */
1218static u32 cik_create_bitmask(u32 bit_width)
1219{
1220 u32 i, mask = 0;
1221
1222 for (i = 0; i < bit_width; i++) {
1223 mask <<= 1;
1224 mask |= 1;
1225 }
1226 return mask;
1227}
1228
1229/**
1230 * cik_select_se_sh - select which SE, SH to address
1231 *
1232 * @rdev: radeon_device pointer
1233 * @max_rb_num: max RBs (render backends) for the asic
1234 * @se_num: number of SEs (shader engines) for the asic
1235 * @sh_per_se: number of SH blocks per SE for the asic
1236 *
1237 * Calculates the bitmask of disabled RBs (CIK).
1238 * Returns the disabled RB bitmask.
1239 */
1240static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1241 u32 max_rb_num, u32 se_num,
1242 u32 sh_per_se)
1243{
1244 u32 data, mask;
1245
1246 data = RREG32(CC_RB_BACKEND_DISABLE);
1247 if (data & 1)
1248 data &= BACKEND_DISABLE_MASK;
1249 else
1250 data = 0;
1251 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1252
1253 data >>= BACKEND_DISABLE_SHIFT;
1254
1255 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1256
1257 return data & mask;
1258}
1259
1260/**
1261 * cik_setup_rb - setup the RBs on the asic
1262 *
1263 * @rdev: radeon_device pointer
1264 * @se_num: number of SEs (shader engines) for the asic
1265 * @sh_per_se: number of SH blocks per SE for the asic
1266 * @max_rb_num: max RBs (render backends) for the asic
1267 *
1268 * Configures per-SE/SH RB registers (CIK).
1269 */
1270static void cik_setup_rb(struct radeon_device *rdev,
1271 u32 se_num, u32 sh_per_se,
1272 u32 max_rb_num)
1273{
1274 int i, j;
1275 u32 data, mask;
1276 u32 disabled_rbs = 0;
1277 u32 enabled_rbs = 0;
1278
1279 for (i = 0; i < se_num; i++) {
1280 for (j = 0; j < sh_per_se; j++) {
1281 cik_select_se_sh(rdev, i, j);
1282 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1283 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1284 }
1285 }
1286 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1287
1288 mask = 1;
1289 for (i = 0; i < max_rb_num; i++) {
1290 if (!(disabled_rbs & mask))
1291 enabled_rbs |= mask;
1292 mask <<= 1;
1293 }
1294
1295 for (i = 0; i < se_num; i++) {
1296 cik_select_se_sh(rdev, i, 0xffffffff);
1297 data = 0;
1298 for (j = 0; j < sh_per_se; j++) {
1299 switch (enabled_rbs & 3) {
1300 case 1:
1301 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1302 break;
1303 case 2:
1304 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1305 break;
1306 case 3:
1307 default:
1308 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1309 break;
1310 }
1311 enabled_rbs >>= 2;
1312 }
1313 WREG32(PA_SC_RASTER_CONFIG, data);
1314 }
1315 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1316}
1317
1318/**
1319 * cik_gpu_init - setup the 3D engine
1320 *
1321 * @rdev: radeon_device pointer
1322 *
1323 * Configures the 3D engine and tiling configuration
1324 * registers so that the 3D engine is usable.
1325 */
1326static void cik_gpu_init(struct radeon_device *rdev)
1327{
1328 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1329 u32 mc_shared_chmap, mc_arb_ramcfg;
1330 u32 hdp_host_path_cntl;
1331 u32 tmp;
1332 int i, j;
1333
1334 switch (rdev->family) {
1335 case CHIP_BONAIRE:
1336 rdev->config.cik.max_shader_engines = 2;
1337 rdev->config.cik.max_tile_pipes = 4;
1338 rdev->config.cik.max_cu_per_sh = 7;
1339 rdev->config.cik.max_sh_per_se = 1;
1340 rdev->config.cik.max_backends_per_se = 2;
1341 rdev->config.cik.max_texture_channel_caches = 4;
1342 rdev->config.cik.max_gprs = 256;
1343 rdev->config.cik.max_gs_threads = 32;
1344 rdev->config.cik.max_hw_contexts = 8;
1345
1346 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1347 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1348 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1349 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1350 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1351 break;
1352 case CHIP_KAVERI:
1353 /* TODO */
1354 break;
1355 case CHIP_KABINI:
1356 default:
1357 rdev->config.cik.max_shader_engines = 1;
1358 rdev->config.cik.max_tile_pipes = 2;
1359 rdev->config.cik.max_cu_per_sh = 2;
1360 rdev->config.cik.max_sh_per_se = 1;
1361 rdev->config.cik.max_backends_per_se = 1;
1362 rdev->config.cik.max_texture_channel_caches = 2;
1363 rdev->config.cik.max_gprs = 256;
1364 rdev->config.cik.max_gs_threads = 16;
1365 rdev->config.cik.max_hw_contexts = 8;
1366
1367 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1368 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1369 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1370 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1371 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1372 break;
1373 }
1374
1375 /* Initialize HDP */
1376 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1377 WREG32((0x2c14 + j), 0x00000000);
1378 WREG32((0x2c18 + j), 0x00000000);
1379 WREG32((0x2c1c + j), 0x00000000);
1380 WREG32((0x2c20 + j), 0x00000000);
1381 WREG32((0x2c24 + j), 0x00000000);
1382 }
1383
1384 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1385
1386 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1387
1388 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1389 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1390
1391 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1392 rdev->config.cik.mem_max_burst_length_bytes = 256;
1393 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1394 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1395 if (rdev->config.cik.mem_row_size_in_kb > 4)
1396 rdev->config.cik.mem_row_size_in_kb = 4;
1397 /* XXX use MC settings? */
1398 rdev->config.cik.shader_engine_tile_size = 32;
1399 rdev->config.cik.num_gpus = 1;
1400 rdev->config.cik.multi_gpu_tile_size = 64;
1401
1402 /* fix up row size */
1403 gb_addr_config &= ~ROW_SIZE_MASK;
1404 switch (rdev->config.cik.mem_row_size_in_kb) {
1405 case 1:
1406 default:
1407 gb_addr_config |= ROW_SIZE(0);
1408 break;
1409 case 2:
1410 gb_addr_config |= ROW_SIZE(1);
1411 break;
1412 case 4:
1413 gb_addr_config |= ROW_SIZE(2);
1414 break;
1415 }
1416
1417 /* setup tiling info dword. gb_addr_config is not adequate since it does
1418 * not have bank info, so create a custom tiling dword.
1419 * bits 3:0 num_pipes
1420 * bits 7:4 num_banks
1421 * bits 11:8 group_size
1422 * bits 15:12 row_size
1423 */
1424 rdev->config.cik.tile_config = 0;
1425 switch (rdev->config.cik.num_tile_pipes) {
1426 case 1:
1427 rdev->config.cik.tile_config |= (0 << 0);
1428 break;
1429 case 2:
1430 rdev->config.cik.tile_config |= (1 << 0);
1431 break;
1432 case 4:
1433 rdev->config.cik.tile_config |= (2 << 0);
1434 break;
1435 case 8:
1436 default:
1437 /* XXX what about 12? */
1438 rdev->config.cik.tile_config |= (3 << 0);
1439 break;
1440 }
1441 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1442 rdev->config.cik.tile_config |= 1 << 4;
1443 else
1444 rdev->config.cik.tile_config |= 0 << 4;
1445 rdev->config.cik.tile_config |=
1446 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1447 rdev->config.cik.tile_config |=
1448 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1449
1450 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1451 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1452 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001453 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1454 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001455
1456 cik_tiling_mode_table_init(rdev);
1457
1458 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1459 rdev->config.cik.max_sh_per_se,
1460 rdev->config.cik.max_backends_per_se);
1461
1462 /* set HW defaults for 3D engine */
1463 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1464
1465 WREG32(SX_DEBUG_1, 0x20);
1466
1467 WREG32(TA_CNTL_AUX, 0x00010000);
1468
1469 tmp = RREG32(SPI_CONFIG_CNTL);
1470 tmp |= 0x03000000;
1471 WREG32(SPI_CONFIG_CNTL, tmp);
1472
1473 WREG32(SQ_CONFIG, 1);
1474
1475 WREG32(DB_DEBUG, 0);
1476
1477 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1478 tmp |= 0x00000400;
1479 WREG32(DB_DEBUG2, tmp);
1480
1481 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1482 tmp |= 0x00020200;
1483 WREG32(DB_DEBUG3, tmp);
1484
1485 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1486 tmp |= 0x00018208;
1487 WREG32(CB_HW_CONTROL, tmp);
1488
1489 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1490
1491 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1492 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1493 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1494 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1495
1496 WREG32(VGT_NUM_INSTANCES, 1);
1497
1498 WREG32(CP_PERFMON_CNTL, 0);
1499
1500 WREG32(SQ_CONFIG, 0);
1501
1502 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1503 FORCE_EOV_MAX_REZ_CNT(255)));
1504
1505 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1506 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1507
1508 WREG32(VGT_GS_VERTEX_REUSE, 16);
1509 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1510
1511 tmp = RREG32(HDP_MISC_CNTL);
1512 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1513 WREG32(HDP_MISC_CNTL, tmp);
1514
1515 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1516 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1517
1518 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1519 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1520
1521 udelay(50);
1522}
1523
Alex Deucher841cf442012-12-18 21:47:44 -05001524/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001525 * GPU scratch registers helpers function.
1526 */
1527/**
1528 * cik_scratch_init - setup driver info for CP scratch regs
1529 *
1530 * @rdev: radeon_device pointer
1531 *
1532 * Set up the number and offset of the CP scratch registers.
1533 * NOTE: use of CP scratch registers is a legacy inferface and
1534 * is not used by default on newer asics (r6xx+). On newer asics,
1535 * memory buffers are used for fences rather than scratch regs.
1536 */
1537static void cik_scratch_init(struct radeon_device *rdev)
1538{
1539 int i;
1540
1541 rdev->scratch.num_reg = 7;
1542 rdev->scratch.reg_base = SCRATCH_REG0;
1543 for (i = 0; i < rdev->scratch.num_reg; i++) {
1544 rdev->scratch.free[i] = true;
1545 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1546 }
1547}
1548
1549/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001550 * cik_ring_test - basic gfx ring test
1551 *
1552 * @rdev: radeon_device pointer
1553 * @ring: radeon_ring structure holding ring information
1554 *
1555 * Allocate a scratch register and write to it using the gfx ring (CIK).
1556 * Provides a basic gfx ring test to verify that the ring is working.
1557 * Used by cik_cp_gfx_resume();
1558 * Returns 0 on success, error on failure.
1559 */
1560int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1561{
1562 uint32_t scratch;
1563 uint32_t tmp = 0;
1564 unsigned i;
1565 int r;
1566
1567 r = radeon_scratch_get(rdev, &scratch);
1568 if (r) {
1569 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1570 return r;
1571 }
1572 WREG32(scratch, 0xCAFEDEAD);
1573 r = radeon_ring_lock(rdev, ring, 3);
1574 if (r) {
1575 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1576 radeon_scratch_free(rdev, scratch);
1577 return r;
1578 }
1579 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1580 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1581 radeon_ring_write(ring, 0xDEADBEEF);
1582 radeon_ring_unlock_commit(rdev, ring);
1583 for (i = 0; i < rdev->usec_timeout; i++) {
1584 tmp = RREG32(scratch);
1585 if (tmp == 0xDEADBEEF)
1586 break;
1587 DRM_UDELAY(1);
1588 }
1589 if (i < rdev->usec_timeout) {
1590 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1591 } else {
1592 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1593 ring->idx, scratch, tmp);
1594 r = -EINVAL;
1595 }
1596 radeon_scratch_free(rdev, scratch);
1597 return r;
1598}
1599
1600/**
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001601 * cik_fence_ring_emit - emit a fence on the gfx ring
1602 *
1603 * @rdev: radeon_device pointer
1604 * @fence: radeon fence object
1605 *
1606 * Emits a fence sequnce number on the gfx ring and flushes
1607 * GPU caches.
1608 */
1609void cik_fence_ring_emit(struct radeon_device *rdev,
1610 struct radeon_fence *fence)
1611{
1612 struct radeon_ring *ring = &rdev->ring[fence->ring];
1613 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1614
1615 /* EVENT_WRITE_EOP - flush caches, send int */
1616 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1617 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1618 EOP_TC_ACTION_EN |
1619 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1620 EVENT_INDEX(5)));
1621 radeon_ring_write(ring, addr & 0xfffffffc);
1622 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1623 radeon_ring_write(ring, fence->seq);
1624 radeon_ring_write(ring, 0);
1625 /* HDP flush */
1626 /* We should be using the new WAIT_REG_MEM special op packet here
1627 * but it causes the CP to hang
1628 */
1629 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1630 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1631 WRITE_DATA_DST_SEL(0)));
1632 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1633 radeon_ring_write(ring, 0);
1634 radeon_ring_write(ring, 0);
1635}
1636
1637void cik_semaphore_ring_emit(struct radeon_device *rdev,
1638 struct radeon_ring *ring,
1639 struct radeon_semaphore *semaphore,
1640 bool emit_wait)
1641{
1642 uint64_t addr = semaphore->gpu_addr;
1643 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1644
1645 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1646 radeon_ring_write(ring, addr & 0xffffffff);
1647 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1648}
1649
1650/*
1651 * IB stuff
1652 */
1653/**
1654 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1655 *
1656 * @rdev: radeon_device pointer
1657 * @ib: radeon indirect buffer object
1658 *
1659 * Emits an DE (drawing engine) or CE (constant engine) IB
1660 * on the gfx ring. IBs are usually generated by userspace
1661 * acceleration drivers and submitted to the kernel for
1662 * sheduling on the ring. This function schedules the IB
1663 * on the gfx ring for execution by the GPU.
1664 */
1665void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1666{
1667 struct radeon_ring *ring = &rdev->ring[ib->ring];
1668 u32 header, control = INDIRECT_BUFFER_VALID;
1669
1670 if (ib->is_const_ib) {
1671 /* set switch buffer packet before const IB */
1672 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1673 radeon_ring_write(ring, 0);
1674
1675 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1676 } else {
1677 u32 next_rptr;
1678 if (ring->rptr_save_reg) {
1679 next_rptr = ring->wptr + 3 + 4;
1680 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1681 radeon_ring_write(ring, ((ring->rptr_save_reg -
1682 PACKET3_SET_UCONFIG_REG_START) >> 2));
1683 radeon_ring_write(ring, next_rptr);
1684 } else if (rdev->wb.enabled) {
1685 next_rptr = ring->wptr + 5 + 4;
1686 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1687 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1688 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1689 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1690 radeon_ring_write(ring, next_rptr);
1691 }
1692
1693 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1694 }
1695
1696 control |= ib->length_dw |
1697 (ib->vm ? (ib->vm->id << 24) : 0);
1698
1699 radeon_ring_write(ring, header);
1700 radeon_ring_write(ring,
1701#ifdef __BIG_ENDIAN
1702 (2 << 0) |
1703#endif
1704 (ib->gpu_addr & 0xFFFFFFFC));
1705 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1706 radeon_ring_write(ring, control);
1707}
1708
Alex Deucherfbc832c2012-07-20 14:41:35 -04001709/**
1710 * cik_ib_test - basic gfx ring IB test
1711 *
1712 * @rdev: radeon_device pointer
1713 * @ring: radeon_ring structure holding ring information
1714 *
1715 * Allocate an IB and execute it on the gfx ring (CIK).
1716 * Provides a basic gfx ring test to verify that IBs are working.
1717 * Returns 0 on success, error on failure.
1718 */
1719int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1720{
1721 struct radeon_ib ib;
1722 uint32_t scratch;
1723 uint32_t tmp = 0;
1724 unsigned i;
1725 int r;
1726
1727 r = radeon_scratch_get(rdev, &scratch);
1728 if (r) {
1729 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1730 return r;
1731 }
1732 WREG32(scratch, 0xCAFEDEAD);
1733 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1734 if (r) {
1735 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1736 return r;
1737 }
1738 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1739 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1740 ib.ptr[2] = 0xDEADBEEF;
1741 ib.length_dw = 3;
1742 r = radeon_ib_schedule(rdev, &ib, NULL);
1743 if (r) {
1744 radeon_scratch_free(rdev, scratch);
1745 radeon_ib_free(rdev, &ib);
1746 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1747 return r;
1748 }
1749 r = radeon_fence_wait(ib.fence, false);
1750 if (r) {
1751 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1752 return r;
1753 }
1754 for (i = 0; i < rdev->usec_timeout; i++) {
1755 tmp = RREG32(scratch);
1756 if (tmp == 0xDEADBEEF)
1757 break;
1758 DRM_UDELAY(1);
1759 }
1760 if (i < rdev->usec_timeout) {
1761 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1762 } else {
1763 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1764 scratch, tmp);
1765 r = -EINVAL;
1766 }
1767 radeon_scratch_free(rdev, scratch);
1768 radeon_ib_free(rdev, &ib);
1769 return r;
1770}
1771
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001772/*
Alex Deucher841cf442012-12-18 21:47:44 -05001773 * CP.
1774 * On CIK, gfx and compute now have independant command processors.
1775 *
1776 * GFX
1777 * Gfx consists of a single ring and can process both gfx jobs and
1778 * compute jobs. The gfx CP consists of three microengines (ME):
1779 * PFP - Pre-Fetch Parser
1780 * ME - Micro Engine
1781 * CE - Constant Engine
1782 * The PFP and ME make up what is considered the Drawing Engine (DE).
1783 * The CE is an asynchronous engine used for updating buffer desciptors
1784 * used by the DE so that they can be loaded into cache in parallel
1785 * while the DE is processing state update packets.
1786 *
1787 * Compute
1788 * The compute CP consists of two microengines (ME):
1789 * MEC1 - Compute MicroEngine 1
1790 * MEC2 - Compute MicroEngine 2
1791 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1792 * The queues are exposed to userspace and are programmed directly
1793 * by the compute runtime.
1794 */
1795/**
1796 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1797 *
1798 * @rdev: radeon_device pointer
1799 * @enable: enable or disable the MEs
1800 *
1801 * Halts or unhalts the gfx MEs.
1802 */
1803static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1804{
1805 if (enable)
1806 WREG32(CP_ME_CNTL, 0);
1807 else {
1808 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1809 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1810 }
1811 udelay(50);
1812}
1813
1814/**
1815 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1816 *
1817 * @rdev: radeon_device pointer
1818 *
1819 * Loads the gfx PFP, ME, and CE ucode.
1820 * Returns 0 for success, -EINVAL if the ucode is not available.
1821 */
1822static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1823{
1824 const __be32 *fw_data;
1825 int i;
1826
1827 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1828 return -EINVAL;
1829
1830 cik_cp_gfx_enable(rdev, false);
1831
1832 /* PFP */
1833 fw_data = (const __be32 *)rdev->pfp_fw->data;
1834 WREG32(CP_PFP_UCODE_ADDR, 0);
1835 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1836 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1837 WREG32(CP_PFP_UCODE_ADDR, 0);
1838
1839 /* CE */
1840 fw_data = (const __be32 *)rdev->ce_fw->data;
1841 WREG32(CP_CE_UCODE_ADDR, 0);
1842 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1843 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1844 WREG32(CP_CE_UCODE_ADDR, 0);
1845
1846 /* ME */
1847 fw_data = (const __be32 *)rdev->me_fw->data;
1848 WREG32(CP_ME_RAM_WADDR, 0);
1849 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1850 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1851 WREG32(CP_ME_RAM_WADDR, 0);
1852
1853 WREG32(CP_PFP_UCODE_ADDR, 0);
1854 WREG32(CP_CE_UCODE_ADDR, 0);
1855 WREG32(CP_ME_RAM_WADDR, 0);
1856 WREG32(CP_ME_RAM_RADDR, 0);
1857 return 0;
1858}
1859
1860/**
1861 * cik_cp_gfx_start - start the gfx ring
1862 *
1863 * @rdev: radeon_device pointer
1864 *
1865 * Enables the ring and loads the clear state context and other
1866 * packets required to init the ring.
1867 * Returns 0 for success, error for failure.
1868 */
1869static int cik_cp_gfx_start(struct radeon_device *rdev)
1870{
1871 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1872 int r, i;
1873
1874 /* init the CP */
1875 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1876 WREG32(CP_ENDIAN_SWAP, 0);
1877 WREG32(CP_DEVICE_ID, 1);
1878
1879 cik_cp_gfx_enable(rdev, true);
1880
1881 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1882 if (r) {
1883 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1884 return r;
1885 }
1886
1887 /* init the CE partitions. CE only used for gfx on CIK */
1888 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1889 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1890 radeon_ring_write(ring, 0xc000);
1891 radeon_ring_write(ring, 0xc000);
1892
1893 /* setup clear context state */
1894 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1895 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1896
1897 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1898 radeon_ring_write(ring, 0x80000000);
1899 radeon_ring_write(ring, 0x80000000);
1900
1901 for (i = 0; i < cik_default_size; i++)
1902 radeon_ring_write(ring, cik_default_state[i]);
1903
1904 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1905 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1906
1907 /* set clear context state */
1908 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1909 radeon_ring_write(ring, 0);
1910
1911 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1912 radeon_ring_write(ring, 0x00000316);
1913 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1914 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1915
1916 radeon_ring_unlock_commit(rdev, ring);
1917
1918 return 0;
1919}
1920
1921/**
1922 * cik_cp_gfx_fini - stop the gfx ring
1923 *
1924 * @rdev: radeon_device pointer
1925 *
1926 * Stop the gfx ring and tear down the driver ring
1927 * info.
1928 */
1929static void cik_cp_gfx_fini(struct radeon_device *rdev)
1930{
1931 cik_cp_gfx_enable(rdev, false);
1932 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1933}
1934
1935/**
1936 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1937 *
1938 * @rdev: radeon_device pointer
1939 *
1940 * Program the location and size of the gfx ring buffer
1941 * and test it to make sure it's working.
1942 * Returns 0 for success, error for failure.
1943 */
1944static int cik_cp_gfx_resume(struct radeon_device *rdev)
1945{
1946 struct radeon_ring *ring;
1947 u32 tmp;
1948 u32 rb_bufsz;
1949 u64 rb_addr;
1950 int r;
1951
1952 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1953 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1954
1955 /* Set the write pointer delay */
1956 WREG32(CP_RB_WPTR_DELAY, 0);
1957
1958 /* set the RB to use vmid 0 */
1959 WREG32(CP_RB_VMID, 0);
1960
1961 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1962
1963 /* ring 0 - compute and gfx */
1964 /* Set ring buffer size */
1965 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1966 rb_bufsz = drm_order(ring->ring_size / 8);
1967 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1968#ifdef __BIG_ENDIAN
1969 tmp |= BUF_SWAP_32BIT;
1970#endif
1971 WREG32(CP_RB0_CNTL, tmp);
1972
1973 /* Initialize the ring buffer's read and write pointers */
1974 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1975 ring->wptr = 0;
1976 WREG32(CP_RB0_WPTR, ring->wptr);
1977
1978 /* set the wb address wether it's enabled or not */
1979 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1980 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1981
1982 /* scratch register shadowing is no longer supported */
1983 WREG32(SCRATCH_UMSK, 0);
1984
1985 if (!rdev->wb.enabled)
1986 tmp |= RB_NO_UPDATE;
1987
1988 mdelay(1);
1989 WREG32(CP_RB0_CNTL, tmp);
1990
1991 rb_addr = ring->gpu_addr >> 8;
1992 WREG32(CP_RB0_BASE, rb_addr);
1993 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1994
1995 ring->rptr = RREG32(CP_RB0_RPTR);
1996
1997 /* start the ring */
1998 cik_cp_gfx_start(rdev);
1999 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2000 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2001 if (r) {
2002 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2003 return r;
2004 }
2005 return 0;
2006}
2007
2008/**
2009 * cik_cp_compute_enable - enable/disable the compute CP MEs
2010 *
2011 * @rdev: radeon_device pointer
2012 * @enable: enable or disable the MEs
2013 *
2014 * Halts or unhalts the compute MEs.
2015 */
2016static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2017{
2018 if (enable)
2019 WREG32(CP_MEC_CNTL, 0);
2020 else
2021 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2022 udelay(50);
2023}
2024
2025/**
2026 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2027 *
2028 * @rdev: radeon_device pointer
2029 *
2030 * Loads the compute MEC1&2 ucode.
2031 * Returns 0 for success, -EINVAL if the ucode is not available.
2032 */
2033static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2034{
2035 const __be32 *fw_data;
2036 int i;
2037
2038 if (!rdev->mec_fw)
2039 return -EINVAL;
2040
2041 cik_cp_compute_enable(rdev, false);
2042
2043 /* MEC1 */
2044 fw_data = (const __be32 *)rdev->mec_fw->data;
2045 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2046 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2047 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2048 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2049
2050 if (rdev->family == CHIP_KAVERI) {
2051 /* MEC2 */
2052 fw_data = (const __be32 *)rdev->mec_fw->data;
2053 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2054 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2055 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2056 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2057 }
2058
2059 return 0;
2060}
2061
2062/**
2063 * cik_cp_compute_start - start the compute queues
2064 *
2065 * @rdev: radeon_device pointer
2066 *
2067 * Enable the compute queues.
2068 * Returns 0 for success, error for failure.
2069 */
2070static int cik_cp_compute_start(struct radeon_device *rdev)
2071{
2072 //todo
2073 return 0;
2074}
2075
2076/**
2077 * cik_cp_compute_fini - stop the compute queues
2078 *
2079 * @rdev: radeon_device pointer
2080 *
2081 * Stop the compute queues and tear down the driver queue
2082 * info.
2083 */
2084static void cik_cp_compute_fini(struct radeon_device *rdev)
2085{
2086 cik_cp_compute_enable(rdev, false);
2087 //todo
2088}
2089
2090/**
2091 * cik_cp_compute_resume - setup the compute queue registers
2092 *
2093 * @rdev: radeon_device pointer
2094 *
2095 * Program the compute queues and test them to make sure they
2096 * are working.
2097 * Returns 0 for success, error for failure.
2098 */
2099static int cik_cp_compute_resume(struct radeon_device *rdev)
2100{
2101 int r;
2102
2103 //todo
2104 r = cik_cp_compute_start(rdev);
2105 if (r)
2106 return r;
2107 return 0;
2108}
2109
2110/* XXX temporary wrappers to handle both compute and gfx */
2111/* XXX */
2112static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2113{
2114 cik_cp_gfx_enable(rdev, enable);
2115 cik_cp_compute_enable(rdev, enable);
2116}
2117
2118/* XXX */
2119static int cik_cp_load_microcode(struct radeon_device *rdev)
2120{
2121 int r;
2122
2123 r = cik_cp_gfx_load_microcode(rdev);
2124 if (r)
2125 return r;
2126 r = cik_cp_compute_load_microcode(rdev);
2127 if (r)
2128 return r;
2129
2130 return 0;
2131}
2132
2133/* XXX */
2134static void cik_cp_fini(struct radeon_device *rdev)
2135{
2136 cik_cp_gfx_fini(rdev);
2137 cik_cp_compute_fini(rdev);
2138}
2139
2140/* XXX */
2141static int cik_cp_resume(struct radeon_device *rdev)
2142{
2143 int r;
2144
2145 /* Reset all cp blocks */
2146 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2147 RREG32(GRBM_SOFT_RESET);
2148 mdelay(15);
2149 WREG32(GRBM_SOFT_RESET, 0);
2150 RREG32(GRBM_SOFT_RESET);
2151
2152 r = cik_cp_load_microcode(rdev);
2153 if (r)
2154 return r;
2155
2156 r = cik_cp_gfx_resume(rdev);
2157 if (r)
2158 return r;
2159 r = cik_cp_compute_resume(rdev);
2160 if (r)
2161 return r;
2162
2163 return 0;
2164}
2165
Alex Deucher21a93e12013-04-09 12:47:11 -04002166/*
2167 * sDMA - System DMA
2168 * Starting with CIK, the GPU has new asynchronous
2169 * DMA engines. These engines are used for compute
2170 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2171 * and each one supports 1 ring buffer used for gfx
2172 * and 2 queues used for compute.
2173 *
2174 * The programming model is very similar to the CP
2175 * (ring buffer, IBs, etc.), but sDMA has it's own
2176 * packet format that is different from the PM4 format
2177 * used by the CP. sDMA supports copying data, writing
2178 * embedded data, solid fills, and a number of other
2179 * things. It also has support for tiling/detiling of
2180 * buffers.
2181 */
2182/**
2183 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2184 *
2185 * @rdev: radeon_device pointer
2186 * @ib: IB object to schedule
2187 *
2188 * Schedule an IB in the DMA ring (CIK).
2189 */
2190void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2191 struct radeon_ib *ib)
2192{
2193 struct radeon_ring *ring = &rdev->ring[ib->ring];
2194 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2195
2196 if (rdev->wb.enabled) {
2197 u32 next_rptr = ring->wptr + 5;
2198 while ((next_rptr & 7) != 4)
2199 next_rptr++;
2200 next_rptr += 4;
2201 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2202 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2203 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2204 radeon_ring_write(ring, 1); /* number of DWs to follow */
2205 radeon_ring_write(ring, next_rptr);
2206 }
2207
2208 /* IB packet must end on a 8 DW boundary */
2209 while ((ring->wptr & 7) != 4)
2210 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2211 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2212 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2213 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2214 radeon_ring_write(ring, ib->length_dw);
2215
2216}
2217
2218/**
2219 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2220 *
2221 * @rdev: radeon_device pointer
2222 * @fence: radeon fence object
2223 *
2224 * Add a DMA fence packet to the ring to write
2225 * the fence seq number and DMA trap packet to generate
2226 * an interrupt if needed (CIK).
2227 */
2228void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2229 struct radeon_fence *fence)
2230{
2231 struct radeon_ring *ring = &rdev->ring[fence->ring];
2232 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2233 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2234 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2235 u32 ref_and_mask;
2236
2237 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2238 ref_and_mask = SDMA0;
2239 else
2240 ref_and_mask = SDMA1;
2241
2242 /* write the fence */
2243 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2244 radeon_ring_write(ring, addr & 0xffffffff);
2245 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2246 radeon_ring_write(ring, fence->seq);
2247 /* generate an interrupt */
2248 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2249 /* flush HDP */
2250 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2251 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2252 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2253 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2254 radeon_ring_write(ring, ref_and_mask); /* MASK */
2255 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2256}
2257
2258/**
2259 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2260 *
2261 * @rdev: radeon_device pointer
2262 * @ring: radeon_ring structure holding ring information
2263 * @semaphore: radeon semaphore object
2264 * @emit_wait: wait or signal semaphore
2265 *
2266 * Add a DMA semaphore packet to the ring wait on or signal
2267 * other rings (CIK).
2268 */
2269void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2270 struct radeon_ring *ring,
2271 struct radeon_semaphore *semaphore,
2272 bool emit_wait)
2273{
2274 u64 addr = semaphore->gpu_addr;
2275 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2276
2277 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2278 radeon_ring_write(ring, addr & 0xfffffff8);
2279 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2280}
2281
2282/**
2283 * cik_sdma_gfx_stop - stop the gfx async dma engines
2284 *
2285 * @rdev: radeon_device pointer
2286 *
2287 * Stop the gfx async dma ring buffers (CIK).
2288 */
2289static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2290{
2291 u32 rb_cntl, reg_offset;
2292 int i;
2293
2294 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2295
2296 for (i = 0; i < 2; i++) {
2297 if (i == 0)
2298 reg_offset = SDMA0_REGISTER_OFFSET;
2299 else
2300 reg_offset = SDMA1_REGISTER_OFFSET;
2301 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2302 rb_cntl &= ~SDMA_RB_ENABLE;
2303 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2304 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2305 }
2306}
2307
2308/**
2309 * cik_sdma_rlc_stop - stop the compute async dma engines
2310 *
2311 * @rdev: radeon_device pointer
2312 *
2313 * Stop the compute async dma queues (CIK).
2314 */
2315static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2316{
2317 /* XXX todo */
2318}
2319
2320/**
2321 * cik_sdma_enable - stop the async dma engines
2322 *
2323 * @rdev: radeon_device pointer
2324 * @enable: enable/disable the DMA MEs.
2325 *
2326 * Halt or unhalt the async dma engines (CIK).
2327 */
2328static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2329{
2330 u32 me_cntl, reg_offset;
2331 int i;
2332
2333 for (i = 0; i < 2; i++) {
2334 if (i == 0)
2335 reg_offset = SDMA0_REGISTER_OFFSET;
2336 else
2337 reg_offset = SDMA1_REGISTER_OFFSET;
2338 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2339 if (enable)
2340 me_cntl &= ~SDMA_HALT;
2341 else
2342 me_cntl |= SDMA_HALT;
2343 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2344 }
2345}
2346
2347/**
2348 * cik_sdma_gfx_resume - setup and start the async dma engines
2349 *
2350 * @rdev: radeon_device pointer
2351 *
2352 * Set up the gfx DMA ring buffers and enable them (CIK).
2353 * Returns 0 for success, error for failure.
2354 */
2355static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2356{
2357 struct radeon_ring *ring;
2358 u32 rb_cntl, ib_cntl;
2359 u32 rb_bufsz;
2360 u32 reg_offset, wb_offset;
2361 int i, r;
2362
2363 for (i = 0; i < 2; i++) {
2364 if (i == 0) {
2365 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2366 reg_offset = SDMA0_REGISTER_OFFSET;
2367 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2368 } else {
2369 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2370 reg_offset = SDMA1_REGISTER_OFFSET;
2371 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2372 }
2373
2374 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2375 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2376
2377 /* Set ring buffer size in dwords */
2378 rb_bufsz = drm_order(ring->ring_size / 4);
2379 rb_cntl = rb_bufsz << 1;
2380#ifdef __BIG_ENDIAN
2381 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2382#endif
2383 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2384
2385 /* Initialize the ring buffer's read and write pointers */
2386 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2387 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2388
2389 /* set the wb address whether it's enabled or not */
2390 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2391 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2392 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2393 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2394
2395 if (rdev->wb.enabled)
2396 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2397
2398 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2399 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2400
2401 ring->wptr = 0;
2402 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2403
2404 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2405
2406 /* enable DMA RB */
2407 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2408
2409 ib_cntl = SDMA_IB_ENABLE;
2410#ifdef __BIG_ENDIAN
2411 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2412#endif
2413 /* enable DMA IBs */
2414 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2415
2416 ring->ready = true;
2417
2418 r = radeon_ring_test(rdev, ring->idx, ring);
2419 if (r) {
2420 ring->ready = false;
2421 return r;
2422 }
2423 }
2424
2425 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2426
2427 return 0;
2428}
2429
2430/**
2431 * cik_sdma_rlc_resume - setup and start the async dma engines
2432 *
2433 * @rdev: radeon_device pointer
2434 *
2435 * Set up the compute DMA queues and enable them (CIK).
2436 * Returns 0 for success, error for failure.
2437 */
2438static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2439{
2440 /* XXX todo */
2441 return 0;
2442}
2443
2444/**
2445 * cik_sdma_load_microcode - load the sDMA ME ucode
2446 *
2447 * @rdev: radeon_device pointer
2448 *
2449 * Loads the sDMA0/1 ucode.
2450 * Returns 0 for success, -EINVAL if the ucode is not available.
2451 */
2452static int cik_sdma_load_microcode(struct radeon_device *rdev)
2453{
2454 const __be32 *fw_data;
2455 int i;
2456
2457 if (!rdev->sdma_fw)
2458 return -EINVAL;
2459
2460 /* stop the gfx rings and rlc compute queues */
2461 cik_sdma_gfx_stop(rdev);
2462 cik_sdma_rlc_stop(rdev);
2463
2464 /* halt the MEs */
2465 cik_sdma_enable(rdev, false);
2466
2467 /* sdma0 */
2468 fw_data = (const __be32 *)rdev->sdma_fw->data;
2469 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2470 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2471 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2472 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2473
2474 /* sdma1 */
2475 fw_data = (const __be32 *)rdev->sdma_fw->data;
2476 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2477 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2478 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2479 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2480
2481 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2482 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2483 return 0;
2484}
2485
2486/**
2487 * cik_sdma_resume - setup and start the async dma engines
2488 *
2489 * @rdev: radeon_device pointer
2490 *
2491 * Set up the DMA engines and enable them (CIK).
2492 * Returns 0 for success, error for failure.
2493 */
2494static int cik_sdma_resume(struct radeon_device *rdev)
2495{
2496 int r;
2497
2498 /* Reset dma */
2499 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2500 RREG32(SRBM_SOFT_RESET);
2501 udelay(50);
2502 WREG32(SRBM_SOFT_RESET, 0);
2503 RREG32(SRBM_SOFT_RESET);
2504
2505 r = cik_sdma_load_microcode(rdev);
2506 if (r)
2507 return r;
2508
2509 /* unhalt the MEs */
2510 cik_sdma_enable(rdev, true);
2511
2512 /* start the gfx rings and rlc compute queues */
2513 r = cik_sdma_gfx_resume(rdev);
2514 if (r)
2515 return r;
2516 r = cik_sdma_rlc_resume(rdev);
2517 if (r)
2518 return r;
2519
2520 return 0;
2521}
2522
2523/**
2524 * cik_sdma_fini - tear down the async dma engines
2525 *
2526 * @rdev: radeon_device pointer
2527 *
2528 * Stop the async dma engines and free the rings (CIK).
2529 */
2530static void cik_sdma_fini(struct radeon_device *rdev)
2531{
2532 /* stop the gfx rings and rlc compute queues */
2533 cik_sdma_gfx_stop(rdev);
2534 cik_sdma_rlc_stop(rdev);
2535 /* halt the MEs */
2536 cik_sdma_enable(rdev, false);
2537 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2538 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2539 /* XXX - compute dma queue tear down */
2540}
2541
2542/**
2543 * cik_copy_dma - copy pages using the DMA engine
2544 *
2545 * @rdev: radeon_device pointer
2546 * @src_offset: src GPU address
2547 * @dst_offset: dst GPU address
2548 * @num_gpu_pages: number of GPU pages to xfer
2549 * @fence: radeon fence object
2550 *
2551 * Copy GPU paging using the DMA engine (CIK).
2552 * Used by the radeon ttm implementation to move pages if
2553 * registered as the asic copy callback.
2554 */
2555int cik_copy_dma(struct radeon_device *rdev,
2556 uint64_t src_offset, uint64_t dst_offset,
2557 unsigned num_gpu_pages,
2558 struct radeon_fence **fence)
2559{
2560 struct radeon_semaphore *sem = NULL;
2561 int ring_index = rdev->asic->copy.dma_ring_index;
2562 struct radeon_ring *ring = &rdev->ring[ring_index];
2563 u32 size_in_bytes, cur_size_in_bytes;
2564 int i, num_loops;
2565 int r = 0;
2566
2567 r = radeon_semaphore_create(rdev, &sem);
2568 if (r) {
2569 DRM_ERROR("radeon: moving bo (%d).\n", r);
2570 return r;
2571 }
2572
2573 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2574 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2575 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2576 if (r) {
2577 DRM_ERROR("radeon: moving bo (%d).\n", r);
2578 radeon_semaphore_free(rdev, &sem, NULL);
2579 return r;
2580 }
2581
2582 if (radeon_fence_need_sync(*fence, ring->idx)) {
2583 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2584 ring->idx);
2585 radeon_fence_note_sync(*fence, ring->idx);
2586 } else {
2587 radeon_semaphore_free(rdev, &sem, NULL);
2588 }
2589
2590 for (i = 0; i < num_loops; i++) {
2591 cur_size_in_bytes = size_in_bytes;
2592 if (cur_size_in_bytes > 0x1fffff)
2593 cur_size_in_bytes = 0x1fffff;
2594 size_in_bytes -= cur_size_in_bytes;
2595 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2596 radeon_ring_write(ring, cur_size_in_bytes);
2597 radeon_ring_write(ring, 0); /* src/dst endian swap */
2598 radeon_ring_write(ring, src_offset & 0xffffffff);
2599 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2600 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2601 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2602 src_offset += cur_size_in_bytes;
2603 dst_offset += cur_size_in_bytes;
2604 }
2605
2606 r = radeon_fence_emit(rdev, fence, ring->idx);
2607 if (r) {
2608 radeon_ring_unlock_undo(rdev, ring);
2609 return r;
2610 }
2611
2612 radeon_ring_unlock_commit(rdev, ring);
2613 radeon_semaphore_free(rdev, &sem, *fence);
2614
2615 return r;
2616}
2617
2618/**
2619 * cik_sdma_ring_test - simple async dma engine test
2620 *
2621 * @rdev: radeon_device pointer
2622 * @ring: radeon_ring structure holding ring information
2623 *
2624 * Test the DMA engine by writing using it to write an
2625 * value to memory. (CIK).
2626 * Returns 0 for success, error for failure.
2627 */
2628int cik_sdma_ring_test(struct radeon_device *rdev,
2629 struct radeon_ring *ring)
2630{
2631 unsigned i;
2632 int r;
2633 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2634 u32 tmp;
2635
2636 if (!ptr) {
2637 DRM_ERROR("invalid vram scratch pointer\n");
2638 return -EINVAL;
2639 }
2640
2641 tmp = 0xCAFEDEAD;
2642 writel(tmp, ptr);
2643
2644 r = radeon_ring_lock(rdev, ring, 4);
2645 if (r) {
2646 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2647 return r;
2648 }
2649 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2650 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2651 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2652 radeon_ring_write(ring, 1); /* number of DWs to follow */
2653 radeon_ring_write(ring, 0xDEADBEEF);
2654 radeon_ring_unlock_commit(rdev, ring);
2655
2656 for (i = 0; i < rdev->usec_timeout; i++) {
2657 tmp = readl(ptr);
2658 if (tmp == 0xDEADBEEF)
2659 break;
2660 DRM_UDELAY(1);
2661 }
2662
2663 if (i < rdev->usec_timeout) {
2664 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2665 } else {
2666 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2667 ring->idx, tmp);
2668 r = -EINVAL;
2669 }
2670 return r;
2671}
2672
2673/**
2674 * cik_sdma_ib_test - test an IB on the DMA engine
2675 *
2676 * @rdev: radeon_device pointer
2677 * @ring: radeon_ring structure holding ring information
2678 *
2679 * Test a simple IB in the DMA ring (CIK).
2680 * Returns 0 on success, error on failure.
2681 */
2682int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2683{
2684 struct radeon_ib ib;
2685 unsigned i;
2686 int r;
2687 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2688 u32 tmp = 0;
2689
2690 if (!ptr) {
2691 DRM_ERROR("invalid vram scratch pointer\n");
2692 return -EINVAL;
2693 }
2694
2695 tmp = 0xCAFEDEAD;
2696 writel(tmp, ptr);
2697
2698 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2699 if (r) {
2700 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2701 return r;
2702 }
2703
2704 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2705 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2706 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2707 ib.ptr[3] = 1;
2708 ib.ptr[4] = 0xDEADBEEF;
2709 ib.length_dw = 5;
2710
2711 r = radeon_ib_schedule(rdev, &ib, NULL);
2712 if (r) {
2713 radeon_ib_free(rdev, &ib);
2714 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2715 return r;
2716 }
2717 r = radeon_fence_wait(ib.fence, false);
2718 if (r) {
2719 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2720 return r;
2721 }
2722 for (i = 0; i < rdev->usec_timeout; i++) {
2723 tmp = readl(ptr);
2724 if (tmp == 0xDEADBEEF)
2725 break;
2726 DRM_UDELAY(1);
2727 }
2728 if (i < rdev->usec_timeout) {
2729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2730 } else {
2731 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2732 r = -EINVAL;
2733 }
2734 radeon_ib_free(rdev, &ib);
2735 return r;
2736}
2737
Alex Deuchercc066712013-04-09 12:59:51 -04002738
2739static void cik_print_gpu_status_regs(struct radeon_device *rdev)
2740{
2741 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2742 RREG32(GRBM_STATUS));
2743 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2744 RREG32(GRBM_STATUS2));
2745 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2746 RREG32(GRBM_STATUS_SE0));
2747 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2748 RREG32(GRBM_STATUS_SE1));
2749 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2750 RREG32(GRBM_STATUS_SE2));
2751 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2752 RREG32(GRBM_STATUS_SE3));
2753 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2754 RREG32(SRBM_STATUS));
2755 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2756 RREG32(SRBM_STATUS2));
2757 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
2758 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
2759 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
2760 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
2761}
2762
Alex Deucher6f2043c2013-04-09 12:43:41 -04002763/**
Alex Deuchercc066712013-04-09 12:59:51 -04002764 * cik_gpu_check_soft_reset - check which blocks are busy
2765 *
2766 * @rdev: radeon_device pointer
2767 *
2768 * Check which blocks are busy and return the relevant reset
2769 * mask to be used by cik_gpu_soft_reset().
2770 * Returns a mask of the blocks to be reset.
2771 */
2772static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
2773{
2774 u32 reset_mask = 0;
2775 u32 tmp;
2776
2777 /* GRBM_STATUS */
2778 tmp = RREG32(GRBM_STATUS);
2779 if (tmp & (PA_BUSY | SC_BUSY |
2780 BCI_BUSY | SX_BUSY |
2781 TA_BUSY | VGT_BUSY |
2782 DB_BUSY | CB_BUSY |
2783 GDS_BUSY | SPI_BUSY |
2784 IA_BUSY | IA_BUSY_NO_DMA))
2785 reset_mask |= RADEON_RESET_GFX;
2786
2787 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
2788 reset_mask |= RADEON_RESET_CP;
2789
2790 /* GRBM_STATUS2 */
2791 tmp = RREG32(GRBM_STATUS2);
2792 if (tmp & RLC_BUSY)
2793 reset_mask |= RADEON_RESET_RLC;
2794
2795 /* SDMA0_STATUS_REG */
2796 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2797 if (!(tmp & SDMA_IDLE))
2798 reset_mask |= RADEON_RESET_DMA;
2799
2800 /* SDMA1_STATUS_REG */
2801 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2802 if (!(tmp & SDMA_IDLE))
2803 reset_mask |= RADEON_RESET_DMA1;
2804
2805 /* SRBM_STATUS2 */
2806 tmp = RREG32(SRBM_STATUS2);
2807 if (tmp & SDMA_BUSY)
2808 reset_mask |= RADEON_RESET_DMA;
2809
2810 if (tmp & SDMA1_BUSY)
2811 reset_mask |= RADEON_RESET_DMA1;
2812
2813 /* SRBM_STATUS */
2814 tmp = RREG32(SRBM_STATUS);
2815
2816 if (tmp & IH_BUSY)
2817 reset_mask |= RADEON_RESET_IH;
2818
2819 if (tmp & SEM_BUSY)
2820 reset_mask |= RADEON_RESET_SEM;
2821
2822 if (tmp & GRBM_RQ_PENDING)
2823 reset_mask |= RADEON_RESET_GRBM;
2824
2825 if (tmp & VMC_BUSY)
2826 reset_mask |= RADEON_RESET_VMC;
2827
2828 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
2829 MCC_BUSY | MCD_BUSY))
2830 reset_mask |= RADEON_RESET_MC;
2831
2832 if (evergreen_is_display_hung(rdev))
2833 reset_mask |= RADEON_RESET_DISPLAY;
2834
2835 /* Skip MC reset as it's mostly likely not hung, just busy */
2836 if (reset_mask & RADEON_RESET_MC) {
2837 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
2838 reset_mask &= ~RADEON_RESET_MC;
2839 }
2840
2841 return reset_mask;
2842}
2843
2844/**
2845 * cik_gpu_soft_reset - soft reset GPU
2846 *
2847 * @rdev: radeon_device pointer
2848 * @reset_mask: mask of which blocks to reset
2849 *
2850 * Soft reset the blocks specified in @reset_mask.
2851 */
2852static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
2853{
2854 struct evergreen_mc_save save;
2855 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
2856 u32 tmp;
2857
2858 if (reset_mask == 0)
2859 return;
2860
2861 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
2862
2863 cik_print_gpu_status_regs(rdev);
2864 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2865 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
2866 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2867 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
2868
2869 /* stop the rlc */
2870 cik_rlc_stop(rdev);
2871
2872 /* Disable GFX parsing/prefetching */
2873 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2874
2875 /* Disable MEC parsing/prefetching */
2876 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2877
2878 if (reset_mask & RADEON_RESET_DMA) {
2879 /* sdma0 */
2880 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
2881 tmp |= SDMA_HALT;
2882 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
2883 }
2884 if (reset_mask & RADEON_RESET_DMA1) {
2885 /* sdma1 */
2886 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
2887 tmp |= SDMA_HALT;
2888 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
2889 }
2890
2891 evergreen_mc_stop(rdev, &save);
2892 if (evergreen_mc_wait_for_idle(rdev)) {
2893 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2894 }
2895
2896 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
2897 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
2898
2899 if (reset_mask & RADEON_RESET_CP) {
2900 grbm_soft_reset |= SOFT_RESET_CP;
2901
2902 srbm_soft_reset |= SOFT_RESET_GRBM;
2903 }
2904
2905 if (reset_mask & RADEON_RESET_DMA)
2906 srbm_soft_reset |= SOFT_RESET_SDMA;
2907
2908 if (reset_mask & RADEON_RESET_DMA1)
2909 srbm_soft_reset |= SOFT_RESET_SDMA1;
2910
2911 if (reset_mask & RADEON_RESET_DISPLAY)
2912 srbm_soft_reset |= SOFT_RESET_DC;
2913
2914 if (reset_mask & RADEON_RESET_RLC)
2915 grbm_soft_reset |= SOFT_RESET_RLC;
2916
2917 if (reset_mask & RADEON_RESET_SEM)
2918 srbm_soft_reset |= SOFT_RESET_SEM;
2919
2920 if (reset_mask & RADEON_RESET_IH)
2921 srbm_soft_reset |= SOFT_RESET_IH;
2922
2923 if (reset_mask & RADEON_RESET_GRBM)
2924 srbm_soft_reset |= SOFT_RESET_GRBM;
2925
2926 if (reset_mask & RADEON_RESET_VMC)
2927 srbm_soft_reset |= SOFT_RESET_VMC;
2928
2929 if (!(rdev->flags & RADEON_IS_IGP)) {
2930 if (reset_mask & RADEON_RESET_MC)
2931 srbm_soft_reset |= SOFT_RESET_MC;
2932 }
2933
2934 if (grbm_soft_reset) {
2935 tmp = RREG32(GRBM_SOFT_RESET);
2936 tmp |= grbm_soft_reset;
2937 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
2938 WREG32(GRBM_SOFT_RESET, tmp);
2939 tmp = RREG32(GRBM_SOFT_RESET);
2940
2941 udelay(50);
2942
2943 tmp &= ~grbm_soft_reset;
2944 WREG32(GRBM_SOFT_RESET, tmp);
2945 tmp = RREG32(GRBM_SOFT_RESET);
2946 }
2947
2948 if (srbm_soft_reset) {
2949 tmp = RREG32(SRBM_SOFT_RESET);
2950 tmp |= srbm_soft_reset;
2951 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
2952 WREG32(SRBM_SOFT_RESET, tmp);
2953 tmp = RREG32(SRBM_SOFT_RESET);
2954
2955 udelay(50);
2956
2957 tmp &= ~srbm_soft_reset;
2958 WREG32(SRBM_SOFT_RESET, tmp);
2959 tmp = RREG32(SRBM_SOFT_RESET);
2960 }
2961
2962 /* Wait a little for things to settle down */
2963 udelay(50);
2964
2965 evergreen_mc_resume(rdev, &save);
2966 udelay(50);
2967
2968 cik_print_gpu_status_regs(rdev);
2969}
2970
2971/**
2972 * cik_asic_reset - soft reset GPU
2973 *
2974 * @rdev: radeon_device pointer
2975 *
2976 * Look up which blocks are hung and attempt
2977 * to reset them.
2978 * Returns 0 for success.
2979 */
2980int cik_asic_reset(struct radeon_device *rdev)
2981{
2982 u32 reset_mask;
2983
2984 reset_mask = cik_gpu_check_soft_reset(rdev);
2985
2986 if (reset_mask)
2987 r600_set_bios_scratch_engine_hung(rdev, true);
2988
2989 cik_gpu_soft_reset(rdev, reset_mask);
2990
2991 reset_mask = cik_gpu_check_soft_reset(rdev);
2992
2993 if (!reset_mask)
2994 r600_set_bios_scratch_engine_hung(rdev, false);
2995
2996 return 0;
2997}
2998
2999/**
3000 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04003001 *
3002 * @rdev: radeon_device pointer
3003 * @ring: radeon_ring structure holding ring information
3004 *
3005 * Check if the 3D engine is locked up (CIK).
3006 * Returns true if the engine is locked, false if not.
3007 */
Alex Deuchercc066712013-04-09 12:59:51 -04003008bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04003009{
Alex Deuchercc066712013-04-09 12:59:51 -04003010 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04003011
Alex Deuchercc066712013-04-09 12:59:51 -04003012 if (!(reset_mask & (RADEON_RESET_GFX |
3013 RADEON_RESET_COMPUTE |
3014 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04003015 radeon_ring_lockup_update(ring);
3016 return false;
3017 }
3018 /* force CP activities */
3019 radeon_ring_force_activity(rdev, ring);
3020 return radeon_ring_test_lockup(rdev, ring);
3021}
3022
3023/**
Alex Deucher21a93e12013-04-09 12:47:11 -04003024 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3025 *
3026 * @rdev: radeon_device pointer
3027 * @ring: radeon_ring structure holding ring information
3028 *
3029 * Check if the async DMA engine is locked up (CIK).
3030 * Returns true if the engine appears to be locked up, false if not.
3031 */
3032bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3033{
Alex Deuchercc066712013-04-09 12:59:51 -04003034 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
3035 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04003036
3037 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04003038 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04003039 else
Alex Deuchercc066712013-04-09 12:59:51 -04003040 mask = RADEON_RESET_DMA1;
3041
3042 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04003043 radeon_ring_lockup_update(ring);
3044 return false;
3045 }
3046 /* force ring activities */
3047 radeon_ring_force_activity(rdev, ring);
3048 return radeon_ring_test_lockup(rdev, ring);
3049}
3050
Alex Deucher1c491652013-04-09 12:45:26 -04003051/* MC */
3052/**
3053 * cik_mc_program - program the GPU memory controller
3054 *
3055 * @rdev: radeon_device pointer
3056 *
3057 * Set the location of vram, gart, and AGP in the GPU's
3058 * physical address space (CIK).
3059 */
3060static void cik_mc_program(struct radeon_device *rdev)
3061{
3062 struct evergreen_mc_save save;
3063 u32 tmp;
3064 int i, j;
3065
3066 /* Initialize HDP */
3067 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3068 WREG32((0x2c14 + j), 0x00000000);
3069 WREG32((0x2c18 + j), 0x00000000);
3070 WREG32((0x2c1c + j), 0x00000000);
3071 WREG32((0x2c20 + j), 0x00000000);
3072 WREG32((0x2c24 + j), 0x00000000);
3073 }
3074 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3075
3076 evergreen_mc_stop(rdev, &save);
3077 if (radeon_mc_wait_for_idle(rdev)) {
3078 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3079 }
3080 /* Lockout access through VGA aperture*/
3081 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3082 /* Update configuration */
3083 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3084 rdev->mc.vram_start >> 12);
3085 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3086 rdev->mc.vram_end >> 12);
3087 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3088 rdev->vram_scratch.gpu_addr >> 12);
3089 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3090 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3091 WREG32(MC_VM_FB_LOCATION, tmp);
3092 /* XXX double check these! */
3093 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3094 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3095 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3096 WREG32(MC_VM_AGP_BASE, 0);
3097 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3098 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3099 if (radeon_mc_wait_for_idle(rdev)) {
3100 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3101 }
3102 evergreen_mc_resume(rdev, &save);
3103 /* we need to own VRAM, so turn off the VGA renderer here
3104 * to stop it overwriting our objects */
3105 rv515_vga_render_disable(rdev);
3106}
3107
3108/**
3109 * cik_mc_init - initialize the memory controller driver params
3110 *
3111 * @rdev: radeon_device pointer
3112 *
3113 * Look up the amount of vram, vram width, and decide how to place
3114 * vram and gart within the GPU's physical address space (CIK).
3115 * Returns 0 for success.
3116 */
3117static int cik_mc_init(struct radeon_device *rdev)
3118{
3119 u32 tmp;
3120 int chansize, numchan;
3121
3122 /* Get VRAM informations */
3123 rdev->mc.vram_is_ddr = true;
3124 tmp = RREG32(MC_ARB_RAMCFG);
3125 if (tmp & CHANSIZE_MASK) {
3126 chansize = 64;
3127 } else {
3128 chansize = 32;
3129 }
3130 tmp = RREG32(MC_SHARED_CHMAP);
3131 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3132 case 0:
3133 default:
3134 numchan = 1;
3135 break;
3136 case 1:
3137 numchan = 2;
3138 break;
3139 case 2:
3140 numchan = 4;
3141 break;
3142 case 3:
3143 numchan = 8;
3144 break;
3145 case 4:
3146 numchan = 3;
3147 break;
3148 case 5:
3149 numchan = 6;
3150 break;
3151 case 6:
3152 numchan = 10;
3153 break;
3154 case 7:
3155 numchan = 12;
3156 break;
3157 case 8:
3158 numchan = 16;
3159 break;
3160 }
3161 rdev->mc.vram_width = numchan * chansize;
3162 /* Could aper size report 0 ? */
3163 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3164 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3165 /* size in MB on si */
3166 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3167 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3168 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3169 si_vram_gtt_location(rdev, &rdev->mc);
3170 radeon_update_bandwidth_info(rdev);
3171
3172 return 0;
3173}
3174
3175/*
3176 * GART
3177 * VMID 0 is the physical GPU addresses as used by the kernel.
3178 * VMIDs 1-15 are used for userspace clients and are handled
3179 * by the radeon vm/hsa code.
3180 */
3181/**
3182 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3183 *
3184 * @rdev: radeon_device pointer
3185 *
3186 * Flush the TLB for the VMID 0 page table (CIK).
3187 */
3188void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3189{
3190 /* flush hdp cache */
3191 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3192
3193 /* bits 0-15 are the VM contexts0-15 */
3194 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3195}
3196
3197/**
3198 * cik_pcie_gart_enable - gart enable
3199 *
3200 * @rdev: radeon_device pointer
3201 *
3202 * This sets up the TLBs, programs the page tables for VMID0,
3203 * sets up the hw for VMIDs 1-15 which are allocated on
3204 * demand, and sets up the global locations for the LDS, GDS,
3205 * and GPUVM for FSA64 clients (CIK).
3206 * Returns 0 for success, errors for failure.
3207 */
3208static int cik_pcie_gart_enable(struct radeon_device *rdev)
3209{
3210 int r, i;
3211
3212 if (rdev->gart.robj == NULL) {
3213 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3214 return -EINVAL;
3215 }
3216 r = radeon_gart_table_vram_pin(rdev);
3217 if (r)
3218 return r;
3219 radeon_gart_restore(rdev);
3220 /* Setup TLB control */
3221 WREG32(MC_VM_MX_L1_TLB_CNTL,
3222 (0xA << 7) |
3223 ENABLE_L1_TLB |
3224 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3225 ENABLE_ADVANCED_DRIVER_MODEL |
3226 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3227 /* Setup L2 cache */
3228 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3229 ENABLE_L2_FRAGMENT_PROCESSING |
3230 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3231 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3232 EFFECTIVE_L2_QUEUE_SIZE(7) |
3233 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3234 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3235 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3236 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3237 /* setup context0 */
3238 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3239 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3240 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3241 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3242 (u32)(rdev->dummy_page.addr >> 12));
3243 WREG32(VM_CONTEXT0_CNTL2, 0);
3244 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3245 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3246
3247 WREG32(0x15D4, 0);
3248 WREG32(0x15D8, 0);
3249 WREG32(0x15DC, 0);
3250
3251 /* empty context1-15 */
3252 /* FIXME start with 4G, once using 2 level pt switch to full
3253 * vm size space
3254 */
3255 /* set vm size, must be a multiple of 4 */
3256 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3257 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3258 for (i = 1; i < 16; i++) {
3259 if (i < 8)
3260 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3261 rdev->gart.table_addr >> 12);
3262 else
3263 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3264 rdev->gart.table_addr >> 12);
3265 }
3266
3267 /* enable context1-15 */
3268 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3269 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003270 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003271 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003272 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3273 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3274 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3275 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3276 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3277 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3278 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3279 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3280 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3281 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3282 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3283 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003284
3285 /* TC cache setup ??? */
3286 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3287 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3288 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3289
3290 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3291 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3292 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3293 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3294 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3295
3296 WREG32(TC_CFG_L1_VOLATILE, 0);
3297 WREG32(TC_CFG_L2_VOLATILE, 0);
3298
3299 if (rdev->family == CHIP_KAVERI) {
3300 u32 tmp = RREG32(CHUB_CONTROL);
3301 tmp &= ~BYPASS_VM;
3302 WREG32(CHUB_CONTROL, tmp);
3303 }
3304
3305 /* XXX SH_MEM regs */
3306 /* where to put LDS, scratch, GPUVM in FSA64 space */
3307 for (i = 0; i < 16; i++) {
3308 WREG32(SRBM_GFX_CNTL, VMID(i));
Alex Deucher21a93e12013-04-09 12:47:11 -04003309 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003310 WREG32(SH_MEM_CONFIG, 0);
3311 WREG32(SH_MEM_APE1_BASE, 1);
3312 WREG32(SH_MEM_APE1_LIMIT, 0);
3313 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003314 /* SDMA GFX */
3315 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3316 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3317 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3318 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3319 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003320 }
3321 WREG32(SRBM_GFX_CNTL, 0);
3322
3323 cik_pcie_gart_tlb_flush(rdev);
3324 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3325 (unsigned)(rdev->mc.gtt_size >> 20),
3326 (unsigned long long)rdev->gart.table_addr);
3327 rdev->gart.ready = true;
3328 return 0;
3329}
3330
3331/**
3332 * cik_pcie_gart_disable - gart disable
3333 *
3334 * @rdev: radeon_device pointer
3335 *
3336 * This disables all VM page table (CIK).
3337 */
3338static void cik_pcie_gart_disable(struct radeon_device *rdev)
3339{
3340 /* Disable all tables */
3341 WREG32(VM_CONTEXT0_CNTL, 0);
3342 WREG32(VM_CONTEXT1_CNTL, 0);
3343 /* Setup TLB control */
3344 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3345 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3346 /* Setup L2 cache */
3347 WREG32(VM_L2_CNTL,
3348 ENABLE_L2_FRAGMENT_PROCESSING |
3349 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3350 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3351 EFFECTIVE_L2_QUEUE_SIZE(7) |
3352 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3353 WREG32(VM_L2_CNTL2, 0);
3354 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3355 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3356 radeon_gart_table_vram_unpin(rdev);
3357}
3358
3359/**
3360 * cik_pcie_gart_fini - vm fini callback
3361 *
3362 * @rdev: radeon_device pointer
3363 *
3364 * Tears down the driver GART/VM setup (CIK).
3365 */
3366static void cik_pcie_gart_fini(struct radeon_device *rdev)
3367{
3368 cik_pcie_gart_disable(rdev);
3369 radeon_gart_table_vram_free(rdev);
3370 radeon_gart_fini(rdev);
3371}
3372
3373/* vm parser */
3374/**
3375 * cik_ib_parse - vm ib_parse callback
3376 *
3377 * @rdev: radeon_device pointer
3378 * @ib: indirect buffer pointer
3379 *
3380 * CIK uses hw IB checking so this is a nop (CIK).
3381 */
3382int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3383{
3384 return 0;
3385}
3386
3387/*
3388 * vm
3389 * VMID 0 is the physical GPU addresses as used by the kernel.
3390 * VMIDs 1-15 are used for userspace clients and are handled
3391 * by the radeon vm/hsa code.
3392 */
3393/**
3394 * cik_vm_init - cik vm init callback
3395 *
3396 * @rdev: radeon_device pointer
3397 *
3398 * Inits cik specific vm parameters (number of VMs, base of vram for
3399 * VMIDs 1-15) (CIK).
3400 * Returns 0 for success.
3401 */
3402int cik_vm_init(struct radeon_device *rdev)
3403{
3404 /* number of VMs */
3405 rdev->vm_manager.nvm = 16;
3406 /* base offset of vram pages */
3407 if (rdev->flags & RADEON_IS_IGP) {
3408 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3409 tmp <<= 22;
3410 rdev->vm_manager.vram_base_offset = tmp;
3411 } else
3412 rdev->vm_manager.vram_base_offset = 0;
3413
3414 return 0;
3415}
3416
3417/**
3418 * cik_vm_fini - cik vm fini callback
3419 *
3420 * @rdev: radeon_device pointer
3421 *
3422 * Tear down any asic specific VM setup (CIK).
3423 */
3424void cik_vm_fini(struct radeon_device *rdev)
3425{
3426}
3427
Alex Deucherf96ab482012-08-31 10:37:47 -04003428/**
3429 * cik_vm_flush - cik vm flush using the CP
3430 *
3431 * @rdev: radeon_device pointer
3432 *
3433 * Update the page table base and flush the VM TLB
3434 * using the CP (CIK).
3435 */
3436void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3437{
3438 struct radeon_ring *ring = &rdev->ring[ridx];
3439
3440 if (vm == NULL)
3441 return;
3442
3443 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3444 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3445 WRITE_DATA_DST_SEL(0)));
3446 if (vm->id < 8) {
3447 radeon_ring_write(ring,
3448 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3449 } else {
3450 radeon_ring_write(ring,
3451 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3452 }
3453 radeon_ring_write(ring, 0);
3454 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3455
3456 /* update SH_MEM_* regs */
3457 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3458 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3459 WRITE_DATA_DST_SEL(0)));
3460 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3461 radeon_ring_write(ring, 0);
3462 radeon_ring_write(ring, VMID(vm->id));
3463
3464 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3465 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3466 WRITE_DATA_DST_SEL(0)));
3467 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3468 radeon_ring_write(ring, 0);
3469
3470 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3471 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3472 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3473 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3474
3475 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3476 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3477 WRITE_DATA_DST_SEL(0)));
3478 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3479 radeon_ring_write(ring, 0);
3480 radeon_ring_write(ring, VMID(0));
3481
3482 /* HDP flush */
3483 /* We should be using the WAIT_REG_MEM packet here like in
3484 * cik_fence_ring_emit(), but it causes the CP to hang in this
3485 * context...
3486 */
3487 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3488 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3489 WRITE_DATA_DST_SEL(0)));
3490 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3491 radeon_ring_write(ring, 0);
3492 radeon_ring_write(ring, 0);
3493
3494 /* bits 0-15 are the VM contexts0-15 */
3495 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3496 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3497 WRITE_DATA_DST_SEL(0)));
3498 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3499 radeon_ring_write(ring, 0);
3500 radeon_ring_write(ring, 1 << vm->id);
3501
3502 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3503 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3504 radeon_ring_write(ring, 0x0);
3505}
3506
Alex Deucher605de6b2012-10-22 13:04:03 -04003507/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04003508 * cik_vm_set_page - update the page tables using sDMA
3509 *
3510 * @rdev: radeon_device pointer
3511 * @ib: indirect buffer to fill with commands
3512 * @pe: addr of the page entry
3513 * @addr: dst addr to write into pe
3514 * @count: number of page entries to update
3515 * @incr: increase next addr by incr bytes
3516 * @flags: access flags
3517 *
3518 * Update the page tables using CP or sDMA (CIK).
3519 */
3520void cik_vm_set_page(struct radeon_device *rdev,
3521 struct radeon_ib *ib,
3522 uint64_t pe,
3523 uint64_t addr, unsigned count,
3524 uint32_t incr, uint32_t flags)
3525{
3526 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3527 uint64_t value;
3528 unsigned ndw;
3529
3530 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3531 /* CP */
3532 while (count) {
3533 ndw = 2 + count * 2;
3534 if (ndw > 0x3FFE)
3535 ndw = 0x3FFE;
3536
3537 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3538 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3539 WRITE_DATA_DST_SEL(1));
3540 ib->ptr[ib->length_dw++] = pe;
3541 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3542 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3543 if (flags & RADEON_VM_PAGE_SYSTEM) {
3544 value = radeon_vm_map_gart(rdev, addr);
3545 value &= 0xFFFFFFFFFFFFF000ULL;
3546 } else if (flags & RADEON_VM_PAGE_VALID) {
3547 value = addr;
3548 } else {
3549 value = 0;
3550 }
3551 addr += incr;
3552 value |= r600_flags;
3553 ib->ptr[ib->length_dw++] = value;
3554 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3555 }
3556 }
3557 } else {
3558 /* DMA */
3559 if (flags & RADEON_VM_PAGE_SYSTEM) {
3560 while (count) {
3561 ndw = count * 2;
3562 if (ndw > 0xFFFFE)
3563 ndw = 0xFFFFE;
3564
3565 /* for non-physically contiguous pages (system) */
3566 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3567 ib->ptr[ib->length_dw++] = pe;
3568 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3569 ib->ptr[ib->length_dw++] = ndw;
3570 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3571 if (flags & RADEON_VM_PAGE_SYSTEM) {
3572 value = radeon_vm_map_gart(rdev, addr);
3573 value &= 0xFFFFFFFFFFFFF000ULL;
3574 } else if (flags & RADEON_VM_PAGE_VALID) {
3575 value = addr;
3576 } else {
3577 value = 0;
3578 }
3579 addr += incr;
3580 value |= r600_flags;
3581 ib->ptr[ib->length_dw++] = value;
3582 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3583 }
3584 }
3585 } else {
3586 while (count) {
3587 ndw = count;
3588 if (ndw > 0x7FFFF)
3589 ndw = 0x7FFFF;
3590
3591 if (flags & RADEON_VM_PAGE_VALID)
3592 value = addr;
3593 else
3594 value = 0;
3595 /* for physically contiguous pages (vram) */
3596 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3597 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3598 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3599 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3600 ib->ptr[ib->length_dw++] = 0;
3601 ib->ptr[ib->length_dw++] = value; /* value */
3602 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3603 ib->ptr[ib->length_dw++] = incr; /* increment size */
3604 ib->ptr[ib->length_dw++] = 0;
3605 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3606 pe += ndw * 8;
3607 addr += ndw * incr;
3608 count -= ndw;
3609 }
3610 }
3611 while (ib->length_dw & 0x7)
3612 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3613 }
3614}
3615
3616/**
Alex Deucher605de6b2012-10-22 13:04:03 -04003617 * cik_dma_vm_flush - cik vm flush using sDMA
3618 *
3619 * @rdev: radeon_device pointer
3620 *
3621 * Update the page table base and flush the VM TLB
3622 * using sDMA (CIK).
3623 */
3624void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3625{
3626 struct radeon_ring *ring = &rdev->ring[ridx];
3627 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3628 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3629 u32 ref_and_mask;
3630
3631 if (vm == NULL)
3632 return;
3633
3634 if (ridx == R600_RING_TYPE_DMA_INDEX)
3635 ref_and_mask = SDMA0;
3636 else
3637 ref_and_mask = SDMA1;
3638
3639 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3640 if (vm->id < 8) {
3641 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3642 } else {
3643 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3644 }
3645 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3646
3647 /* update SH_MEM_* regs */
3648 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3649 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3650 radeon_ring_write(ring, VMID(vm->id));
3651
3652 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3653 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3654 radeon_ring_write(ring, 0);
3655
3656 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3657 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3658 radeon_ring_write(ring, 0);
3659
3660 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3661 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3662 radeon_ring_write(ring, 1);
3663
3664 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3665 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3666 radeon_ring_write(ring, 0);
3667
3668 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3669 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3670 radeon_ring_write(ring, VMID(0));
3671
3672 /* flush HDP */
3673 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3674 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3675 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3676 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3677 radeon_ring_write(ring, ref_and_mask); /* MASK */
3678 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3679
3680 /* flush TLB */
3681 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3682 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3683 radeon_ring_write(ring, 1 << vm->id);
3684}
3685
Alex Deucherf6796ca2012-11-09 10:44:08 -05003686/*
3687 * RLC
3688 * The RLC is a multi-purpose microengine that handles a
3689 * variety of functions, the most important of which is
3690 * the interrupt controller.
3691 */
3692/**
3693 * cik_rlc_stop - stop the RLC ME
3694 *
3695 * @rdev: radeon_device pointer
3696 *
3697 * Halt the RLC ME (MicroEngine) (CIK).
3698 */
3699static void cik_rlc_stop(struct radeon_device *rdev)
3700{
3701 int i, j, k;
3702 u32 mask, tmp;
3703
3704 tmp = RREG32(CP_INT_CNTL_RING0);
3705 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3706 WREG32(CP_INT_CNTL_RING0, tmp);
3707
3708 RREG32(CB_CGTT_SCLK_CTRL);
3709 RREG32(CB_CGTT_SCLK_CTRL);
3710 RREG32(CB_CGTT_SCLK_CTRL);
3711 RREG32(CB_CGTT_SCLK_CTRL);
3712
3713 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3714 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3715
3716 WREG32(RLC_CNTL, 0);
3717
3718 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3719 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3720 cik_select_se_sh(rdev, i, j);
3721 for (k = 0; k < rdev->usec_timeout; k++) {
3722 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3723 break;
3724 udelay(1);
3725 }
3726 }
3727 }
3728 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3729
3730 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3731 for (k = 0; k < rdev->usec_timeout; k++) {
3732 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3733 break;
3734 udelay(1);
3735 }
3736}
3737
3738/**
3739 * cik_rlc_start - start the RLC ME
3740 *
3741 * @rdev: radeon_device pointer
3742 *
3743 * Unhalt the RLC ME (MicroEngine) (CIK).
3744 */
3745static void cik_rlc_start(struct radeon_device *rdev)
3746{
3747 u32 tmp;
3748
3749 WREG32(RLC_CNTL, RLC_ENABLE);
3750
3751 tmp = RREG32(CP_INT_CNTL_RING0);
3752 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3753 WREG32(CP_INT_CNTL_RING0, tmp);
3754
3755 udelay(50);
3756}
3757
3758/**
3759 * cik_rlc_resume - setup the RLC hw
3760 *
3761 * @rdev: radeon_device pointer
3762 *
3763 * Initialize the RLC registers, load the ucode,
3764 * and start the RLC (CIK).
3765 * Returns 0 for success, -EINVAL if the ucode is not available.
3766 */
3767static int cik_rlc_resume(struct radeon_device *rdev)
3768{
3769 u32 i, size;
3770 u32 clear_state_info[3];
3771 const __be32 *fw_data;
3772
3773 if (!rdev->rlc_fw)
3774 return -EINVAL;
3775
3776 switch (rdev->family) {
3777 case CHIP_BONAIRE:
3778 default:
3779 size = BONAIRE_RLC_UCODE_SIZE;
3780 break;
3781 case CHIP_KAVERI:
3782 size = KV_RLC_UCODE_SIZE;
3783 break;
3784 case CHIP_KABINI:
3785 size = KB_RLC_UCODE_SIZE;
3786 break;
3787 }
3788
3789 cik_rlc_stop(rdev);
3790
3791 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3792 RREG32(GRBM_SOFT_RESET);
3793 udelay(50);
3794 WREG32(GRBM_SOFT_RESET, 0);
3795 RREG32(GRBM_SOFT_RESET);
3796 udelay(50);
3797
3798 WREG32(RLC_LB_CNTR_INIT, 0);
3799 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3800
3801 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3802 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3803 WREG32(RLC_LB_PARAMS, 0x00600408);
3804 WREG32(RLC_LB_CNTL, 0x80000004);
3805
3806 WREG32(RLC_MC_CNTL, 0);
3807 WREG32(RLC_UCODE_CNTL, 0);
3808
3809 fw_data = (const __be32 *)rdev->rlc_fw->data;
3810 WREG32(RLC_GPM_UCODE_ADDR, 0);
3811 for (i = 0; i < size; i++)
3812 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3813 WREG32(RLC_GPM_UCODE_ADDR, 0);
3814
3815 /* XXX */
3816 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3817 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3818 clear_state_info[2] = 0;//cik_default_size;
3819 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3820 for (i = 0; i < 3; i++)
3821 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3822 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3823
3824 cik_rlc_start(rdev);
3825
3826 return 0;
3827}
Alex Deuchera59781b2012-11-09 10:45:57 -05003828
3829/*
3830 * Interrupts
3831 * Starting with r6xx, interrupts are handled via a ring buffer.
3832 * Ring buffers are areas of GPU accessible memory that the GPU
3833 * writes interrupt vectors into and the host reads vectors out of.
3834 * There is a rptr (read pointer) that determines where the
3835 * host is currently reading, and a wptr (write pointer)
3836 * which determines where the GPU has written. When the
3837 * pointers are equal, the ring is idle. When the GPU
3838 * writes vectors to the ring buffer, it increments the
3839 * wptr. When there is an interrupt, the host then starts
3840 * fetching commands and processing them until the pointers are
3841 * equal again at which point it updates the rptr.
3842 */
3843
3844/**
3845 * cik_enable_interrupts - Enable the interrupt ring buffer
3846 *
3847 * @rdev: radeon_device pointer
3848 *
3849 * Enable the interrupt ring buffer (CIK).
3850 */
3851static void cik_enable_interrupts(struct radeon_device *rdev)
3852{
3853 u32 ih_cntl = RREG32(IH_CNTL);
3854 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3855
3856 ih_cntl |= ENABLE_INTR;
3857 ih_rb_cntl |= IH_RB_ENABLE;
3858 WREG32(IH_CNTL, ih_cntl);
3859 WREG32(IH_RB_CNTL, ih_rb_cntl);
3860 rdev->ih.enabled = true;
3861}
3862
3863/**
3864 * cik_disable_interrupts - Disable the interrupt ring buffer
3865 *
3866 * @rdev: radeon_device pointer
3867 *
3868 * Disable the interrupt ring buffer (CIK).
3869 */
3870static void cik_disable_interrupts(struct radeon_device *rdev)
3871{
3872 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3873 u32 ih_cntl = RREG32(IH_CNTL);
3874
3875 ih_rb_cntl &= ~IH_RB_ENABLE;
3876 ih_cntl &= ~ENABLE_INTR;
3877 WREG32(IH_RB_CNTL, ih_rb_cntl);
3878 WREG32(IH_CNTL, ih_cntl);
3879 /* set rptr, wptr to 0 */
3880 WREG32(IH_RB_RPTR, 0);
3881 WREG32(IH_RB_WPTR, 0);
3882 rdev->ih.enabled = false;
3883 rdev->ih.rptr = 0;
3884}
3885
3886/**
3887 * cik_disable_interrupt_state - Disable all interrupt sources
3888 *
3889 * @rdev: radeon_device pointer
3890 *
3891 * Clear all interrupt enable bits used by the driver (CIK).
3892 */
3893static void cik_disable_interrupt_state(struct radeon_device *rdev)
3894{
3895 u32 tmp;
3896
3897 /* gfx ring */
3898 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04003899 /* sdma */
3900 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3901 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3902 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3903 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05003904 /* compute queues */
3905 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3906 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3907 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3908 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3909 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3910 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3911 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3912 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3913 /* grbm */
3914 WREG32(GRBM_INT_CNTL, 0);
3915 /* vline/vblank, etc. */
3916 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3917 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3918 if (rdev->num_crtc >= 4) {
3919 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3920 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3921 }
3922 if (rdev->num_crtc >= 6) {
3923 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3924 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3925 }
3926
3927 /* dac hotplug */
3928 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3929
3930 /* digital hotplug */
3931 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3932 WREG32(DC_HPD1_INT_CONTROL, tmp);
3933 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3934 WREG32(DC_HPD2_INT_CONTROL, tmp);
3935 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3936 WREG32(DC_HPD3_INT_CONTROL, tmp);
3937 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3938 WREG32(DC_HPD4_INT_CONTROL, tmp);
3939 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3940 WREG32(DC_HPD5_INT_CONTROL, tmp);
3941 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3942 WREG32(DC_HPD6_INT_CONTROL, tmp);
3943
3944}
3945
3946/**
3947 * cik_irq_init - init and enable the interrupt ring
3948 *
3949 * @rdev: radeon_device pointer
3950 *
3951 * Allocate a ring buffer for the interrupt controller,
3952 * enable the RLC, disable interrupts, enable the IH
3953 * ring buffer and enable it (CIK).
3954 * Called at device load and reume.
3955 * Returns 0 for success, errors for failure.
3956 */
3957static int cik_irq_init(struct radeon_device *rdev)
3958{
3959 int ret = 0;
3960 int rb_bufsz;
3961 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3962
3963 /* allocate ring */
3964 ret = r600_ih_ring_alloc(rdev);
3965 if (ret)
3966 return ret;
3967
3968 /* disable irqs */
3969 cik_disable_interrupts(rdev);
3970
3971 /* init rlc */
3972 ret = cik_rlc_resume(rdev);
3973 if (ret) {
3974 r600_ih_ring_fini(rdev);
3975 return ret;
3976 }
3977
3978 /* setup interrupt control */
3979 /* XXX this should actually be a bus address, not an MC address. same on older asics */
3980 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3981 interrupt_cntl = RREG32(INTERRUPT_CNTL);
3982 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3983 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3984 */
3985 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3986 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3987 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3988 WREG32(INTERRUPT_CNTL, interrupt_cntl);
3989
3990 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3991 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3992
3993 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3994 IH_WPTR_OVERFLOW_CLEAR |
3995 (rb_bufsz << 1));
3996
3997 if (rdev->wb.enabled)
3998 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3999
4000 /* set the writeback address whether it's enabled or not */
4001 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4002 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4003
4004 WREG32(IH_RB_CNTL, ih_rb_cntl);
4005
4006 /* set rptr, wptr to 0 */
4007 WREG32(IH_RB_RPTR, 0);
4008 WREG32(IH_RB_WPTR, 0);
4009
4010 /* Default settings for IH_CNTL (disabled at first) */
4011 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4012 /* RPTR_REARM only works if msi's are enabled */
4013 if (rdev->msi_enabled)
4014 ih_cntl |= RPTR_REARM;
4015 WREG32(IH_CNTL, ih_cntl);
4016
4017 /* force the active interrupt state to all disabled */
4018 cik_disable_interrupt_state(rdev);
4019
4020 pci_set_master(rdev->pdev);
4021
4022 /* enable irqs */
4023 cik_enable_interrupts(rdev);
4024
4025 return ret;
4026}
4027
4028/**
4029 * cik_irq_set - enable/disable interrupt sources
4030 *
4031 * @rdev: radeon_device pointer
4032 *
4033 * Enable interrupt sources on the GPU (vblanks, hpd,
4034 * etc.) (CIK).
4035 * Returns 0 for success, errors for failure.
4036 */
4037int cik_irq_set(struct radeon_device *rdev)
4038{
4039 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
4040 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
4041 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4042 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4043 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04004044 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05004045
4046 if (!rdev->irq.installed) {
4047 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4048 return -EINVAL;
4049 }
4050 /* don't enable anything if the ih is disabled */
4051 if (!rdev->ih.enabled) {
4052 cik_disable_interrupts(rdev);
4053 /* force the active interrupt state to all disabled */
4054 cik_disable_interrupt_state(rdev);
4055 return 0;
4056 }
4057
4058 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4059 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4060 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4061 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4062 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4063 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4064
Alex Deucher21a93e12013-04-09 12:47:11 -04004065 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4066 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4067
Alex Deuchera59781b2012-11-09 10:45:57 -05004068 /* enable CP interrupts on all rings */
4069 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4070 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4071 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4072 }
4073 /* TODO: compute queues! */
4074 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4075
Alex Deucher21a93e12013-04-09 12:47:11 -04004076 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4077 DRM_DEBUG("cik_irq_set: sw int dma\n");
4078 dma_cntl |= TRAP_ENABLE;
4079 }
4080
4081 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4082 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4083 dma_cntl1 |= TRAP_ENABLE;
4084 }
4085
Alex Deuchera59781b2012-11-09 10:45:57 -05004086 if (rdev->irq.crtc_vblank_int[0] ||
4087 atomic_read(&rdev->irq.pflip[0])) {
4088 DRM_DEBUG("cik_irq_set: vblank 0\n");
4089 crtc1 |= VBLANK_INTERRUPT_MASK;
4090 }
4091 if (rdev->irq.crtc_vblank_int[1] ||
4092 atomic_read(&rdev->irq.pflip[1])) {
4093 DRM_DEBUG("cik_irq_set: vblank 1\n");
4094 crtc2 |= VBLANK_INTERRUPT_MASK;
4095 }
4096 if (rdev->irq.crtc_vblank_int[2] ||
4097 atomic_read(&rdev->irq.pflip[2])) {
4098 DRM_DEBUG("cik_irq_set: vblank 2\n");
4099 crtc3 |= VBLANK_INTERRUPT_MASK;
4100 }
4101 if (rdev->irq.crtc_vblank_int[3] ||
4102 atomic_read(&rdev->irq.pflip[3])) {
4103 DRM_DEBUG("cik_irq_set: vblank 3\n");
4104 crtc4 |= VBLANK_INTERRUPT_MASK;
4105 }
4106 if (rdev->irq.crtc_vblank_int[4] ||
4107 atomic_read(&rdev->irq.pflip[4])) {
4108 DRM_DEBUG("cik_irq_set: vblank 4\n");
4109 crtc5 |= VBLANK_INTERRUPT_MASK;
4110 }
4111 if (rdev->irq.crtc_vblank_int[5] ||
4112 atomic_read(&rdev->irq.pflip[5])) {
4113 DRM_DEBUG("cik_irq_set: vblank 5\n");
4114 crtc6 |= VBLANK_INTERRUPT_MASK;
4115 }
4116 if (rdev->irq.hpd[0]) {
4117 DRM_DEBUG("cik_irq_set: hpd 1\n");
4118 hpd1 |= DC_HPDx_INT_EN;
4119 }
4120 if (rdev->irq.hpd[1]) {
4121 DRM_DEBUG("cik_irq_set: hpd 2\n");
4122 hpd2 |= DC_HPDx_INT_EN;
4123 }
4124 if (rdev->irq.hpd[2]) {
4125 DRM_DEBUG("cik_irq_set: hpd 3\n");
4126 hpd3 |= DC_HPDx_INT_EN;
4127 }
4128 if (rdev->irq.hpd[3]) {
4129 DRM_DEBUG("cik_irq_set: hpd 4\n");
4130 hpd4 |= DC_HPDx_INT_EN;
4131 }
4132 if (rdev->irq.hpd[4]) {
4133 DRM_DEBUG("cik_irq_set: hpd 5\n");
4134 hpd5 |= DC_HPDx_INT_EN;
4135 }
4136 if (rdev->irq.hpd[5]) {
4137 DRM_DEBUG("cik_irq_set: hpd 6\n");
4138 hpd6 |= DC_HPDx_INT_EN;
4139 }
4140
4141 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4142
Alex Deucher21a93e12013-04-09 12:47:11 -04004143 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4144 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4145
Alex Deuchera59781b2012-11-09 10:45:57 -05004146 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4147
4148 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4149 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4150 if (rdev->num_crtc >= 4) {
4151 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4152 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4153 }
4154 if (rdev->num_crtc >= 6) {
4155 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4156 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4157 }
4158
4159 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4160 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4161 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4162 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4163 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4164 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4165
4166 return 0;
4167}
4168
4169/**
4170 * cik_irq_ack - ack interrupt sources
4171 *
4172 * @rdev: radeon_device pointer
4173 *
4174 * Ack interrupt sources on the GPU (vblanks, hpd,
4175 * etc.) (CIK). Certain interrupts sources are sw
4176 * generated and do not require an explicit ack.
4177 */
4178static inline void cik_irq_ack(struct radeon_device *rdev)
4179{
4180 u32 tmp;
4181
4182 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4183 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4184 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4185 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4186 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4187 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4188 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4189
4190 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4191 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4192 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4193 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4194 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4195 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4196 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4197 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4198
4199 if (rdev->num_crtc >= 4) {
4200 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4201 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4202 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4203 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4204 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4205 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4206 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4207 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4208 }
4209
4210 if (rdev->num_crtc >= 6) {
4211 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4212 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4213 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4214 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4215 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4216 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4217 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4218 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4219 }
4220
4221 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4222 tmp = RREG32(DC_HPD1_INT_CONTROL);
4223 tmp |= DC_HPDx_INT_ACK;
4224 WREG32(DC_HPD1_INT_CONTROL, tmp);
4225 }
4226 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4227 tmp = RREG32(DC_HPD2_INT_CONTROL);
4228 tmp |= DC_HPDx_INT_ACK;
4229 WREG32(DC_HPD2_INT_CONTROL, tmp);
4230 }
4231 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4232 tmp = RREG32(DC_HPD3_INT_CONTROL);
4233 tmp |= DC_HPDx_INT_ACK;
4234 WREG32(DC_HPD3_INT_CONTROL, tmp);
4235 }
4236 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4237 tmp = RREG32(DC_HPD4_INT_CONTROL);
4238 tmp |= DC_HPDx_INT_ACK;
4239 WREG32(DC_HPD4_INT_CONTROL, tmp);
4240 }
4241 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4242 tmp = RREG32(DC_HPD5_INT_CONTROL);
4243 tmp |= DC_HPDx_INT_ACK;
4244 WREG32(DC_HPD5_INT_CONTROL, tmp);
4245 }
4246 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4247 tmp = RREG32(DC_HPD5_INT_CONTROL);
4248 tmp |= DC_HPDx_INT_ACK;
4249 WREG32(DC_HPD6_INT_CONTROL, tmp);
4250 }
4251}
4252
4253/**
4254 * cik_irq_disable - disable interrupts
4255 *
4256 * @rdev: radeon_device pointer
4257 *
4258 * Disable interrupts on the hw (CIK).
4259 */
4260static void cik_irq_disable(struct radeon_device *rdev)
4261{
4262 cik_disable_interrupts(rdev);
4263 /* Wait and acknowledge irq */
4264 mdelay(1);
4265 cik_irq_ack(rdev);
4266 cik_disable_interrupt_state(rdev);
4267}
4268
4269/**
4270 * cik_irq_disable - disable interrupts for suspend
4271 *
4272 * @rdev: radeon_device pointer
4273 *
4274 * Disable interrupts and stop the RLC (CIK).
4275 * Used for suspend.
4276 */
4277static void cik_irq_suspend(struct radeon_device *rdev)
4278{
4279 cik_irq_disable(rdev);
4280 cik_rlc_stop(rdev);
4281}
4282
4283/**
4284 * cik_irq_fini - tear down interrupt support
4285 *
4286 * @rdev: radeon_device pointer
4287 *
4288 * Disable interrupts on the hw and free the IH ring
4289 * buffer (CIK).
4290 * Used for driver unload.
4291 */
4292static void cik_irq_fini(struct radeon_device *rdev)
4293{
4294 cik_irq_suspend(rdev);
4295 r600_ih_ring_fini(rdev);
4296}
4297
4298/**
4299 * cik_get_ih_wptr - get the IH ring buffer wptr
4300 *
4301 * @rdev: radeon_device pointer
4302 *
4303 * Get the IH ring buffer wptr from either the register
4304 * or the writeback memory buffer (CIK). Also check for
4305 * ring buffer overflow and deal with it.
4306 * Used by cik_irq_process().
4307 * Returns the value of the wptr.
4308 */
4309static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4310{
4311 u32 wptr, tmp;
4312
4313 if (rdev->wb.enabled)
4314 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4315 else
4316 wptr = RREG32(IH_RB_WPTR);
4317
4318 if (wptr & RB_OVERFLOW) {
4319 /* When a ring buffer overflow happen start parsing interrupt
4320 * from the last not overwritten vector (wptr + 16). Hopefully
4321 * this should allow us to catchup.
4322 */
4323 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4324 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4325 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4326 tmp = RREG32(IH_RB_CNTL);
4327 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4328 WREG32(IH_RB_CNTL, tmp);
4329 }
4330 return (wptr & rdev->ih.ptr_mask);
4331}
4332
4333/* CIK IV Ring
4334 * Each IV ring entry is 128 bits:
4335 * [7:0] - interrupt source id
4336 * [31:8] - reserved
4337 * [59:32] - interrupt source data
4338 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04004339 * [71:64] - RINGID
4340 * CP:
4341 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05004342 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4343 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4344 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4345 * PIPE_ID - ME0 0=3D
4346 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04004347 * SDMA:
4348 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4349 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4350 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05004351 * [79:72] - VMID
4352 * [95:80] - PASID
4353 * [127:96] - reserved
4354 */
4355/**
4356 * cik_irq_process - interrupt handler
4357 *
4358 * @rdev: radeon_device pointer
4359 *
4360 * Interrupt hander (CIK). Walk the IH ring,
4361 * ack interrupts and schedule work to handle
4362 * interrupt events.
4363 * Returns irq process return code.
4364 */
4365int cik_irq_process(struct radeon_device *rdev)
4366{
4367 u32 wptr;
4368 u32 rptr;
4369 u32 src_id, src_data, ring_id;
4370 u8 me_id, pipe_id, queue_id;
4371 u32 ring_index;
4372 bool queue_hotplug = false;
4373 bool queue_reset = false;
4374
4375 if (!rdev->ih.enabled || rdev->shutdown)
4376 return IRQ_NONE;
4377
4378 wptr = cik_get_ih_wptr(rdev);
4379
4380restart_ih:
4381 /* is somebody else already processing irqs? */
4382 if (atomic_xchg(&rdev->ih.lock, 1))
4383 return IRQ_NONE;
4384
4385 rptr = rdev->ih.rptr;
4386 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4387
4388 /* Order reading of wptr vs. reading of IH ring data */
4389 rmb();
4390
4391 /* display interrupts */
4392 cik_irq_ack(rdev);
4393
4394 while (rptr != wptr) {
4395 /* wptr/rptr are in bytes! */
4396 ring_index = rptr / 4;
4397 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4398 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4399 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05004400
4401 switch (src_id) {
4402 case 1: /* D1 vblank/vline */
4403 switch (src_data) {
4404 case 0: /* D1 vblank */
4405 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4406 if (rdev->irq.crtc_vblank_int[0]) {
4407 drm_handle_vblank(rdev->ddev, 0);
4408 rdev->pm.vblank_sync = true;
4409 wake_up(&rdev->irq.vblank_queue);
4410 }
4411 if (atomic_read(&rdev->irq.pflip[0]))
4412 radeon_crtc_handle_flip(rdev, 0);
4413 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4414 DRM_DEBUG("IH: D1 vblank\n");
4415 }
4416 break;
4417 case 1: /* D1 vline */
4418 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4419 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4420 DRM_DEBUG("IH: D1 vline\n");
4421 }
4422 break;
4423 default:
4424 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4425 break;
4426 }
4427 break;
4428 case 2: /* D2 vblank/vline */
4429 switch (src_data) {
4430 case 0: /* D2 vblank */
4431 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4432 if (rdev->irq.crtc_vblank_int[1]) {
4433 drm_handle_vblank(rdev->ddev, 1);
4434 rdev->pm.vblank_sync = true;
4435 wake_up(&rdev->irq.vblank_queue);
4436 }
4437 if (atomic_read(&rdev->irq.pflip[1]))
4438 radeon_crtc_handle_flip(rdev, 1);
4439 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4440 DRM_DEBUG("IH: D2 vblank\n");
4441 }
4442 break;
4443 case 1: /* D2 vline */
4444 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4445 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4446 DRM_DEBUG("IH: D2 vline\n");
4447 }
4448 break;
4449 default:
4450 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4451 break;
4452 }
4453 break;
4454 case 3: /* D3 vblank/vline */
4455 switch (src_data) {
4456 case 0: /* D3 vblank */
4457 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4458 if (rdev->irq.crtc_vblank_int[2]) {
4459 drm_handle_vblank(rdev->ddev, 2);
4460 rdev->pm.vblank_sync = true;
4461 wake_up(&rdev->irq.vblank_queue);
4462 }
4463 if (atomic_read(&rdev->irq.pflip[2]))
4464 radeon_crtc_handle_flip(rdev, 2);
4465 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4466 DRM_DEBUG("IH: D3 vblank\n");
4467 }
4468 break;
4469 case 1: /* D3 vline */
4470 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4471 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4472 DRM_DEBUG("IH: D3 vline\n");
4473 }
4474 break;
4475 default:
4476 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4477 break;
4478 }
4479 break;
4480 case 4: /* D4 vblank/vline */
4481 switch (src_data) {
4482 case 0: /* D4 vblank */
4483 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4484 if (rdev->irq.crtc_vblank_int[3]) {
4485 drm_handle_vblank(rdev->ddev, 3);
4486 rdev->pm.vblank_sync = true;
4487 wake_up(&rdev->irq.vblank_queue);
4488 }
4489 if (atomic_read(&rdev->irq.pflip[3]))
4490 radeon_crtc_handle_flip(rdev, 3);
4491 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4492 DRM_DEBUG("IH: D4 vblank\n");
4493 }
4494 break;
4495 case 1: /* D4 vline */
4496 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4497 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4498 DRM_DEBUG("IH: D4 vline\n");
4499 }
4500 break;
4501 default:
4502 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4503 break;
4504 }
4505 break;
4506 case 5: /* D5 vblank/vline */
4507 switch (src_data) {
4508 case 0: /* D5 vblank */
4509 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4510 if (rdev->irq.crtc_vblank_int[4]) {
4511 drm_handle_vblank(rdev->ddev, 4);
4512 rdev->pm.vblank_sync = true;
4513 wake_up(&rdev->irq.vblank_queue);
4514 }
4515 if (atomic_read(&rdev->irq.pflip[4]))
4516 radeon_crtc_handle_flip(rdev, 4);
4517 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4518 DRM_DEBUG("IH: D5 vblank\n");
4519 }
4520 break;
4521 case 1: /* D5 vline */
4522 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4523 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4524 DRM_DEBUG("IH: D5 vline\n");
4525 }
4526 break;
4527 default:
4528 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4529 break;
4530 }
4531 break;
4532 case 6: /* D6 vblank/vline */
4533 switch (src_data) {
4534 case 0: /* D6 vblank */
4535 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4536 if (rdev->irq.crtc_vblank_int[5]) {
4537 drm_handle_vblank(rdev->ddev, 5);
4538 rdev->pm.vblank_sync = true;
4539 wake_up(&rdev->irq.vblank_queue);
4540 }
4541 if (atomic_read(&rdev->irq.pflip[5]))
4542 radeon_crtc_handle_flip(rdev, 5);
4543 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4544 DRM_DEBUG("IH: D6 vblank\n");
4545 }
4546 break;
4547 case 1: /* D6 vline */
4548 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4549 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4550 DRM_DEBUG("IH: D6 vline\n");
4551 }
4552 break;
4553 default:
4554 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4555 break;
4556 }
4557 break;
4558 case 42: /* HPD hotplug */
4559 switch (src_data) {
4560 case 0:
4561 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4562 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4563 queue_hotplug = true;
4564 DRM_DEBUG("IH: HPD1\n");
4565 }
4566 break;
4567 case 1:
4568 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4569 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4570 queue_hotplug = true;
4571 DRM_DEBUG("IH: HPD2\n");
4572 }
4573 break;
4574 case 2:
4575 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4576 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4577 queue_hotplug = true;
4578 DRM_DEBUG("IH: HPD3\n");
4579 }
4580 break;
4581 case 3:
4582 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4583 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4584 queue_hotplug = true;
4585 DRM_DEBUG("IH: HPD4\n");
4586 }
4587 break;
4588 case 4:
4589 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4590 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4591 queue_hotplug = true;
4592 DRM_DEBUG("IH: HPD5\n");
4593 }
4594 break;
4595 case 5:
4596 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4597 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4598 queue_hotplug = true;
4599 DRM_DEBUG("IH: HPD6\n");
4600 }
4601 break;
4602 default:
4603 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4604 break;
4605 }
4606 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04004607 case 146:
4608 case 147:
4609 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4610 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4611 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4612 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4613 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4614 /* reset addr and status */
4615 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4616 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004617 case 176: /* GFX RB CP_INT */
4618 case 177: /* GFX IB CP_INT */
4619 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4620 break;
4621 case 181: /* CP EOP event */
4622 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004623 /* XXX check the bitfield order! */
4624 me_id = (ring_id & 0x60) >> 5;
4625 pipe_id = (ring_id & 0x18) >> 3;
4626 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004627 switch (me_id) {
4628 case 0:
4629 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4630 break;
4631 case 1:
4632 /* XXX compute */
4633 break;
4634 case 2:
4635 /* XXX compute */
4636 break;
4637 }
4638 break;
4639 case 184: /* CP Privileged reg access */
4640 DRM_ERROR("Illegal register access in command stream\n");
4641 /* XXX check the bitfield order! */
4642 me_id = (ring_id & 0x60) >> 5;
4643 pipe_id = (ring_id & 0x18) >> 3;
4644 queue_id = (ring_id & 0x7) >> 0;
4645 switch (me_id) {
4646 case 0:
4647 /* This results in a full GPU reset, but all we need to do is soft
4648 * reset the CP for gfx
4649 */
4650 queue_reset = true;
4651 break;
4652 case 1:
4653 /* XXX compute */
4654 break;
4655 case 2:
4656 /* XXX compute */
4657 break;
4658 }
4659 break;
4660 case 185: /* CP Privileged inst */
4661 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004662 /* XXX check the bitfield order! */
4663 me_id = (ring_id & 0x60) >> 5;
4664 pipe_id = (ring_id & 0x18) >> 3;
4665 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004666 switch (me_id) {
4667 case 0:
4668 /* This results in a full GPU reset, but all we need to do is soft
4669 * reset the CP for gfx
4670 */
4671 queue_reset = true;
4672 break;
4673 case 1:
4674 /* XXX compute */
4675 break;
4676 case 2:
4677 /* XXX compute */
4678 break;
4679 }
4680 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04004681 case 224: /* SDMA trap event */
4682 /* XXX check the bitfield order! */
4683 me_id = (ring_id & 0x3) >> 0;
4684 queue_id = (ring_id & 0xc) >> 2;
4685 DRM_DEBUG("IH: SDMA trap\n");
4686 switch (me_id) {
4687 case 0:
4688 switch (queue_id) {
4689 case 0:
4690 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4691 break;
4692 case 1:
4693 /* XXX compute */
4694 break;
4695 case 2:
4696 /* XXX compute */
4697 break;
4698 }
4699 break;
4700 case 1:
4701 switch (queue_id) {
4702 case 0:
4703 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4704 break;
4705 case 1:
4706 /* XXX compute */
4707 break;
4708 case 2:
4709 /* XXX compute */
4710 break;
4711 }
4712 break;
4713 }
4714 break;
4715 case 241: /* SDMA Privileged inst */
4716 case 247: /* SDMA Privileged inst */
4717 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4718 /* XXX check the bitfield order! */
4719 me_id = (ring_id & 0x3) >> 0;
4720 queue_id = (ring_id & 0xc) >> 2;
4721 switch (me_id) {
4722 case 0:
4723 switch (queue_id) {
4724 case 0:
4725 queue_reset = true;
4726 break;
4727 case 1:
4728 /* XXX compute */
4729 queue_reset = true;
4730 break;
4731 case 2:
4732 /* XXX compute */
4733 queue_reset = true;
4734 break;
4735 }
4736 break;
4737 case 1:
4738 switch (queue_id) {
4739 case 0:
4740 queue_reset = true;
4741 break;
4742 case 1:
4743 /* XXX compute */
4744 queue_reset = true;
4745 break;
4746 case 2:
4747 /* XXX compute */
4748 queue_reset = true;
4749 break;
4750 }
4751 break;
4752 }
4753 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004754 case 233: /* GUI IDLE */
4755 DRM_DEBUG("IH: GUI idle\n");
4756 break;
4757 default:
4758 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4759 break;
4760 }
4761
4762 /* wptr/rptr are in bytes! */
4763 rptr += 16;
4764 rptr &= rdev->ih.ptr_mask;
4765 }
4766 if (queue_hotplug)
4767 schedule_work(&rdev->hotplug_work);
4768 if (queue_reset)
4769 schedule_work(&rdev->reset_work);
4770 rdev->ih.rptr = rptr;
4771 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4772 atomic_set(&rdev->ih.lock, 0);
4773
4774 /* make sure wptr hasn't changed while processing */
4775 wptr = cik_get_ih_wptr(rdev);
4776 if (wptr != rptr)
4777 goto restart_ih;
4778
4779 return IRQ_HANDLED;
4780}
Alex Deucher7bf94a22012-08-17 11:48:29 -04004781
4782/*
4783 * startup/shutdown callbacks
4784 */
4785/**
4786 * cik_startup - program the asic to a functional state
4787 *
4788 * @rdev: radeon_device pointer
4789 *
4790 * Programs the asic to a functional state (CIK).
4791 * Called by cik_init() and cik_resume().
4792 * Returns 0 for success, error for failure.
4793 */
4794static int cik_startup(struct radeon_device *rdev)
4795{
4796 struct radeon_ring *ring;
4797 int r;
4798
4799 if (rdev->flags & RADEON_IS_IGP) {
4800 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4801 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4802 r = cik_init_microcode(rdev);
4803 if (r) {
4804 DRM_ERROR("Failed to load firmware!\n");
4805 return r;
4806 }
4807 }
4808 } else {
4809 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4810 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4811 !rdev->mc_fw) {
4812 r = cik_init_microcode(rdev);
4813 if (r) {
4814 DRM_ERROR("Failed to load firmware!\n");
4815 return r;
4816 }
4817 }
4818
4819 r = ci_mc_load_microcode(rdev);
4820 if (r) {
4821 DRM_ERROR("Failed to load MC firmware!\n");
4822 return r;
4823 }
4824 }
4825
4826 r = r600_vram_scratch_init(rdev);
4827 if (r)
4828 return r;
4829
4830 cik_mc_program(rdev);
4831 r = cik_pcie_gart_enable(rdev);
4832 if (r)
4833 return r;
4834 cik_gpu_init(rdev);
4835
4836 /* allocate rlc buffers */
4837 r = si_rlc_init(rdev);
4838 if (r) {
4839 DRM_ERROR("Failed to init rlc BOs!\n");
4840 return r;
4841 }
4842
4843 /* allocate wb buffer */
4844 r = radeon_wb_init(rdev);
4845 if (r)
4846 return r;
4847
4848 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4849 if (r) {
4850 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4851 return r;
4852 }
4853
4854 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4855 if (r) {
4856 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4857 return r;
4858 }
4859
4860 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4861 if (r) {
4862 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4863 return r;
4864 }
4865
4866 /* Enable IRQ */
4867 if (!rdev->irq.installed) {
4868 r = radeon_irq_kms_init(rdev);
4869 if (r)
4870 return r;
4871 }
4872
4873 r = cik_irq_init(rdev);
4874 if (r) {
4875 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4876 radeon_irq_kms_fini(rdev);
4877 return r;
4878 }
4879 cik_irq_set(rdev);
4880
4881 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4882 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4883 CP_RB0_RPTR, CP_RB0_WPTR,
4884 0, 0xfffff, RADEON_CP_PACKET2);
4885 if (r)
4886 return r;
4887
4888 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4889 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4890 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4891 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4892 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4893 if (r)
4894 return r;
4895
4896 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4897 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4898 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4899 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4900 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4901 if (r)
4902 return r;
4903
4904 r = cik_cp_resume(rdev);
4905 if (r)
4906 return r;
4907
4908 r = cik_sdma_resume(rdev);
4909 if (r)
4910 return r;
4911
4912 r = radeon_ib_pool_init(rdev);
4913 if (r) {
4914 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4915 return r;
4916 }
4917
4918 r = radeon_vm_manager_init(rdev);
4919 if (r) {
4920 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4921 return r;
4922 }
4923
4924 return 0;
4925}
4926
4927/**
4928 * cik_resume - resume the asic to a functional state
4929 *
4930 * @rdev: radeon_device pointer
4931 *
4932 * Programs the asic to a functional state (CIK).
4933 * Called at resume.
4934 * Returns 0 for success, error for failure.
4935 */
4936int cik_resume(struct radeon_device *rdev)
4937{
4938 int r;
4939
4940 /* post card */
4941 atom_asic_init(rdev->mode_info.atom_context);
4942
4943 rdev->accel_working = true;
4944 r = cik_startup(rdev);
4945 if (r) {
4946 DRM_ERROR("cik startup failed on resume\n");
4947 rdev->accel_working = false;
4948 return r;
4949 }
4950
4951 return r;
4952
4953}
4954
4955/**
4956 * cik_suspend - suspend the asic
4957 *
4958 * @rdev: radeon_device pointer
4959 *
4960 * Bring the chip into a state suitable for suspend (CIK).
4961 * Called at suspend.
4962 * Returns 0 for success.
4963 */
4964int cik_suspend(struct radeon_device *rdev)
4965{
4966 radeon_vm_manager_fini(rdev);
4967 cik_cp_enable(rdev, false);
4968 cik_sdma_enable(rdev, false);
4969 cik_irq_suspend(rdev);
4970 radeon_wb_disable(rdev);
4971 cik_pcie_gart_disable(rdev);
4972 return 0;
4973}
4974
4975/* Plan is to move initialization in that function and use
4976 * helper function so that radeon_device_init pretty much
4977 * do nothing more than calling asic specific function. This
4978 * should also allow to remove a bunch of callback function
4979 * like vram_info.
4980 */
4981/**
4982 * cik_init - asic specific driver and hw init
4983 *
4984 * @rdev: radeon_device pointer
4985 *
4986 * Setup asic specific driver variables and program the hw
4987 * to a functional state (CIK).
4988 * Called at driver startup.
4989 * Returns 0 for success, errors for failure.
4990 */
4991int cik_init(struct radeon_device *rdev)
4992{
4993 struct radeon_ring *ring;
4994 int r;
4995
4996 /* Read BIOS */
4997 if (!radeon_get_bios(rdev)) {
4998 if (ASIC_IS_AVIVO(rdev))
4999 return -EINVAL;
5000 }
5001 /* Must be an ATOMBIOS */
5002 if (!rdev->is_atom_bios) {
5003 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5004 return -EINVAL;
5005 }
5006 r = radeon_atombios_init(rdev);
5007 if (r)
5008 return r;
5009
5010 /* Post card if necessary */
5011 if (!radeon_card_posted(rdev)) {
5012 if (!rdev->bios) {
5013 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5014 return -EINVAL;
5015 }
5016 DRM_INFO("GPU not posted. posting now...\n");
5017 atom_asic_init(rdev->mode_info.atom_context);
5018 }
5019 /* Initialize scratch registers */
5020 cik_scratch_init(rdev);
5021 /* Initialize surface registers */
5022 radeon_surface_init(rdev);
5023 /* Initialize clocks */
5024 radeon_get_clock_info(rdev->ddev);
5025
5026 /* Fence driver */
5027 r = radeon_fence_driver_init(rdev);
5028 if (r)
5029 return r;
5030
5031 /* initialize memory controller */
5032 r = cik_mc_init(rdev);
5033 if (r)
5034 return r;
5035 /* Memory manager */
5036 r = radeon_bo_init(rdev);
5037 if (r)
5038 return r;
5039
5040 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5041 ring->ring_obj = NULL;
5042 r600_ring_init(rdev, ring, 1024 * 1024);
5043
5044 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5045 ring->ring_obj = NULL;
5046 r600_ring_init(rdev, ring, 256 * 1024);
5047
5048 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5049 ring->ring_obj = NULL;
5050 r600_ring_init(rdev, ring, 256 * 1024);
5051
5052 rdev->ih.ring_obj = NULL;
5053 r600_ih_ring_init(rdev, 64 * 1024);
5054
5055 r = r600_pcie_gart_init(rdev);
5056 if (r)
5057 return r;
5058
5059 rdev->accel_working = true;
5060 r = cik_startup(rdev);
5061 if (r) {
5062 dev_err(rdev->dev, "disabling GPU acceleration\n");
5063 cik_cp_fini(rdev);
5064 cik_sdma_fini(rdev);
5065 cik_irq_fini(rdev);
5066 si_rlc_fini(rdev);
5067 radeon_wb_fini(rdev);
5068 radeon_ib_pool_fini(rdev);
5069 radeon_vm_manager_fini(rdev);
5070 radeon_irq_kms_fini(rdev);
5071 cik_pcie_gart_fini(rdev);
5072 rdev->accel_working = false;
5073 }
5074
5075 /* Don't start up if the MC ucode is missing.
5076 * The default clocks and voltages before the MC ucode
5077 * is loaded are not suffient for advanced operations.
5078 */
5079 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
5080 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5081 return -EINVAL;
5082 }
5083
5084 return 0;
5085}
5086
5087/**
5088 * cik_fini - asic specific driver and hw fini
5089 *
5090 * @rdev: radeon_device pointer
5091 *
5092 * Tear down the asic specific driver variables and program the hw
5093 * to an idle state (CIK).
5094 * Called at driver unload.
5095 */
5096void cik_fini(struct radeon_device *rdev)
5097{
5098 cik_cp_fini(rdev);
5099 cik_sdma_fini(rdev);
5100 cik_irq_fini(rdev);
5101 si_rlc_fini(rdev);
5102 radeon_wb_fini(rdev);
5103 radeon_vm_manager_fini(rdev);
5104 radeon_ib_pool_fini(rdev);
5105 radeon_irq_kms_fini(rdev);
5106 cik_pcie_gart_fini(rdev);
5107 r600_vram_scratch_fini(rdev);
5108 radeon_gem_fini(rdev);
5109 radeon_fence_driver_fini(rdev);
5110 radeon_bo_fini(rdev);
5111 radeon_atombios_fini(rdev);
5112 kfree(rdev->bios);
5113 rdev->bios = NULL;
5114}
Alex Deuchercd84a272012-07-20 17:13:13 -04005115
5116/* display watermark setup */
5117/**
5118 * dce8_line_buffer_adjust - Set up the line buffer
5119 *
5120 * @rdev: radeon_device pointer
5121 * @radeon_crtc: the selected display controller
5122 * @mode: the current display mode on the selected display
5123 * controller
5124 *
5125 * Setup up the line buffer allocation for
5126 * the selected display controller (CIK).
5127 * Returns the line buffer size in pixels.
5128 */
5129static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5130 struct radeon_crtc *radeon_crtc,
5131 struct drm_display_mode *mode)
5132{
5133 u32 tmp;
5134
5135 /*
5136 * Line Buffer Setup
5137 * There are 6 line buffers, one for each display controllers.
5138 * There are 3 partitions per LB. Select the number of partitions
5139 * to enable based on the display width. For display widths larger
5140 * than 4096, you need use to use 2 display controllers and combine
5141 * them using the stereo blender.
5142 */
5143 if (radeon_crtc->base.enabled && mode) {
5144 if (mode->crtc_hdisplay < 1920)
5145 tmp = 1;
5146 else if (mode->crtc_hdisplay < 2560)
5147 tmp = 2;
5148 else if (mode->crtc_hdisplay < 4096)
5149 tmp = 0;
5150 else {
5151 DRM_DEBUG_KMS("Mode too big for LB!\n");
5152 tmp = 0;
5153 }
5154 } else
5155 tmp = 1;
5156
5157 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5158 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5159
5160 if (radeon_crtc->base.enabled && mode) {
5161 switch (tmp) {
5162 case 0:
5163 default:
5164 return 4096 * 2;
5165 case 1:
5166 return 1920 * 2;
5167 case 2:
5168 return 2560 * 2;
5169 }
5170 }
5171
5172 /* controller not enabled, so no lb used */
5173 return 0;
5174}
5175
5176/**
5177 * cik_get_number_of_dram_channels - get the number of dram channels
5178 *
5179 * @rdev: radeon_device pointer
5180 *
5181 * Look up the number of video ram channels (CIK).
5182 * Used for display watermark bandwidth calculations
5183 * Returns the number of dram channels
5184 */
5185static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5186{
5187 u32 tmp = RREG32(MC_SHARED_CHMAP);
5188
5189 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5190 case 0:
5191 default:
5192 return 1;
5193 case 1:
5194 return 2;
5195 case 2:
5196 return 4;
5197 case 3:
5198 return 8;
5199 case 4:
5200 return 3;
5201 case 5:
5202 return 6;
5203 case 6:
5204 return 10;
5205 case 7:
5206 return 12;
5207 case 8:
5208 return 16;
5209 }
5210}
5211
5212struct dce8_wm_params {
5213 u32 dram_channels; /* number of dram channels */
5214 u32 yclk; /* bandwidth per dram data pin in kHz */
5215 u32 sclk; /* engine clock in kHz */
5216 u32 disp_clk; /* display clock in kHz */
5217 u32 src_width; /* viewport width */
5218 u32 active_time; /* active display time in ns */
5219 u32 blank_time; /* blank time in ns */
5220 bool interlaced; /* mode is interlaced */
5221 fixed20_12 vsc; /* vertical scale ratio */
5222 u32 num_heads; /* number of active crtcs */
5223 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5224 u32 lb_size; /* line buffer allocated to pipe */
5225 u32 vtaps; /* vertical scaler taps */
5226};
5227
5228/**
5229 * dce8_dram_bandwidth - get the dram bandwidth
5230 *
5231 * @wm: watermark calculation data
5232 *
5233 * Calculate the raw dram bandwidth (CIK).
5234 * Used for display watermark bandwidth calculations
5235 * Returns the dram bandwidth in MBytes/s
5236 */
5237static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5238{
5239 /* Calculate raw DRAM Bandwidth */
5240 fixed20_12 dram_efficiency; /* 0.7 */
5241 fixed20_12 yclk, dram_channels, bandwidth;
5242 fixed20_12 a;
5243
5244 a.full = dfixed_const(1000);
5245 yclk.full = dfixed_const(wm->yclk);
5246 yclk.full = dfixed_div(yclk, a);
5247 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5248 a.full = dfixed_const(10);
5249 dram_efficiency.full = dfixed_const(7);
5250 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5251 bandwidth.full = dfixed_mul(dram_channels, yclk);
5252 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5253
5254 return dfixed_trunc(bandwidth);
5255}
5256
5257/**
5258 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5259 *
5260 * @wm: watermark calculation data
5261 *
5262 * Calculate the dram bandwidth used for display (CIK).
5263 * Used for display watermark bandwidth calculations
5264 * Returns the dram bandwidth for display in MBytes/s
5265 */
5266static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5267{
5268 /* Calculate DRAM Bandwidth and the part allocated to display. */
5269 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5270 fixed20_12 yclk, dram_channels, bandwidth;
5271 fixed20_12 a;
5272
5273 a.full = dfixed_const(1000);
5274 yclk.full = dfixed_const(wm->yclk);
5275 yclk.full = dfixed_div(yclk, a);
5276 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5277 a.full = dfixed_const(10);
5278 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5279 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5280 bandwidth.full = dfixed_mul(dram_channels, yclk);
5281 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5282
5283 return dfixed_trunc(bandwidth);
5284}
5285
5286/**
5287 * dce8_data_return_bandwidth - get the data return bandwidth
5288 *
5289 * @wm: watermark calculation data
5290 *
5291 * Calculate the data return bandwidth used for display (CIK).
5292 * Used for display watermark bandwidth calculations
5293 * Returns the data return bandwidth in MBytes/s
5294 */
5295static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5296{
5297 /* Calculate the display Data return Bandwidth */
5298 fixed20_12 return_efficiency; /* 0.8 */
5299 fixed20_12 sclk, bandwidth;
5300 fixed20_12 a;
5301
5302 a.full = dfixed_const(1000);
5303 sclk.full = dfixed_const(wm->sclk);
5304 sclk.full = dfixed_div(sclk, a);
5305 a.full = dfixed_const(10);
5306 return_efficiency.full = dfixed_const(8);
5307 return_efficiency.full = dfixed_div(return_efficiency, a);
5308 a.full = dfixed_const(32);
5309 bandwidth.full = dfixed_mul(a, sclk);
5310 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5311
5312 return dfixed_trunc(bandwidth);
5313}
5314
5315/**
5316 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5317 *
5318 * @wm: watermark calculation data
5319 *
5320 * Calculate the dmif bandwidth used for display (CIK).
5321 * Used for display watermark bandwidth calculations
5322 * Returns the dmif bandwidth in MBytes/s
5323 */
5324static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5325{
5326 /* Calculate the DMIF Request Bandwidth */
5327 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5328 fixed20_12 disp_clk, bandwidth;
5329 fixed20_12 a, b;
5330
5331 a.full = dfixed_const(1000);
5332 disp_clk.full = dfixed_const(wm->disp_clk);
5333 disp_clk.full = dfixed_div(disp_clk, a);
5334 a.full = dfixed_const(32);
5335 b.full = dfixed_mul(a, disp_clk);
5336
5337 a.full = dfixed_const(10);
5338 disp_clk_request_efficiency.full = dfixed_const(8);
5339 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5340
5341 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5342
5343 return dfixed_trunc(bandwidth);
5344}
5345
5346/**
5347 * dce8_available_bandwidth - get the min available bandwidth
5348 *
5349 * @wm: watermark calculation data
5350 *
5351 * Calculate the min available bandwidth used for display (CIK).
5352 * Used for display watermark bandwidth calculations
5353 * Returns the min available bandwidth in MBytes/s
5354 */
5355static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5356{
5357 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5358 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5359 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5360 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5361
5362 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5363}
5364
5365/**
5366 * dce8_average_bandwidth - get the average available bandwidth
5367 *
5368 * @wm: watermark calculation data
5369 *
5370 * Calculate the average available bandwidth used for display (CIK).
5371 * Used for display watermark bandwidth calculations
5372 * Returns the average available bandwidth in MBytes/s
5373 */
5374static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5375{
5376 /* Calculate the display mode Average Bandwidth
5377 * DisplayMode should contain the source and destination dimensions,
5378 * timing, etc.
5379 */
5380 fixed20_12 bpp;
5381 fixed20_12 line_time;
5382 fixed20_12 src_width;
5383 fixed20_12 bandwidth;
5384 fixed20_12 a;
5385
5386 a.full = dfixed_const(1000);
5387 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5388 line_time.full = dfixed_div(line_time, a);
5389 bpp.full = dfixed_const(wm->bytes_per_pixel);
5390 src_width.full = dfixed_const(wm->src_width);
5391 bandwidth.full = dfixed_mul(src_width, bpp);
5392 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5393 bandwidth.full = dfixed_div(bandwidth, line_time);
5394
5395 return dfixed_trunc(bandwidth);
5396}
5397
5398/**
5399 * dce8_latency_watermark - get the latency watermark
5400 *
5401 * @wm: watermark calculation data
5402 *
5403 * Calculate the latency watermark (CIK).
5404 * Used for display watermark bandwidth calculations
5405 * Returns the latency watermark in ns
5406 */
5407static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5408{
5409 /* First calculate the latency in ns */
5410 u32 mc_latency = 2000; /* 2000 ns. */
5411 u32 available_bandwidth = dce8_available_bandwidth(wm);
5412 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5413 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5414 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5415 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5416 (wm->num_heads * cursor_line_pair_return_time);
5417 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5418 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5419 u32 tmp, dmif_size = 12288;
5420 fixed20_12 a, b, c;
5421
5422 if (wm->num_heads == 0)
5423 return 0;
5424
5425 a.full = dfixed_const(2);
5426 b.full = dfixed_const(1);
5427 if ((wm->vsc.full > a.full) ||
5428 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5429 (wm->vtaps >= 5) ||
5430 ((wm->vsc.full >= a.full) && wm->interlaced))
5431 max_src_lines_per_dst_line = 4;
5432 else
5433 max_src_lines_per_dst_line = 2;
5434
5435 a.full = dfixed_const(available_bandwidth);
5436 b.full = dfixed_const(wm->num_heads);
5437 a.full = dfixed_div(a, b);
5438
5439 b.full = dfixed_const(mc_latency + 512);
5440 c.full = dfixed_const(wm->disp_clk);
5441 b.full = dfixed_div(b, c);
5442
5443 c.full = dfixed_const(dmif_size);
5444 b.full = dfixed_div(c, b);
5445
5446 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5447
5448 b.full = dfixed_const(1000);
5449 c.full = dfixed_const(wm->disp_clk);
5450 b.full = dfixed_div(c, b);
5451 c.full = dfixed_const(wm->bytes_per_pixel);
5452 b.full = dfixed_mul(b, c);
5453
5454 lb_fill_bw = min(tmp, dfixed_trunc(b));
5455
5456 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5457 b.full = dfixed_const(1000);
5458 c.full = dfixed_const(lb_fill_bw);
5459 b.full = dfixed_div(c, b);
5460 a.full = dfixed_div(a, b);
5461 line_fill_time = dfixed_trunc(a);
5462
5463 if (line_fill_time < wm->active_time)
5464 return latency;
5465 else
5466 return latency + (line_fill_time - wm->active_time);
5467
5468}
5469
5470/**
5471 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5472 * average and available dram bandwidth
5473 *
5474 * @wm: watermark calculation data
5475 *
5476 * Check if the display average bandwidth fits in the display
5477 * dram bandwidth (CIK).
5478 * Used for display watermark bandwidth calculations
5479 * Returns true if the display fits, false if not.
5480 */
5481static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5482{
5483 if (dce8_average_bandwidth(wm) <=
5484 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5485 return true;
5486 else
5487 return false;
5488}
5489
5490/**
5491 * dce8_average_bandwidth_vs_available_bandwidth - check
5492 * average and available bandwidth
5493 *
5494 * @wm: watermark calculation data
5495 *
5496 * Check if the display average bandwidth fits in the display
5497 * available bandwidth (CIK).
5498 * Used for display watermark bandwidth calculations
5499 * Returns true if the display fits, false if not.
5500 */
5501static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5502{
5503 if (dce8_average_bandwidth(wm) <=
5504 (dce8_available_bandwidth(wm) / wm->num_heads))
5505 return true;
5506 else
5507 return false;
5508}
5509
5510/**
5511 * dce8_check_latency_hiding - check latency hiding
5512 *
5513 * @wm: watermark calculation data
5514 *
5515 * Check latency hiding (CIK).
5516 * Used for display watermark bandwidth calculations
5517 * Returns true if the display fits, false if not.
5518 */
5519static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5520{
5521 u32 lb_partitions = wm->lb_size / wm->src_width;
5522 u32 line_time = wm->active_time + wm->blank_time;
5523 u32 latency_tolerant_lines;
5524 u32 latency_hiding;
5525 fixed20_12 a;
5526
5527 a.full = dfixed_const(1);
5528 if (wm->vsc.full > a.full)
5529 latency_tolerant_lines = 1;
5530 else {
5531 if (lb_partitions <= (wm->vtaps + 1))
5532 latency_tolerant_lines = 1;
5533 else
5534 latency_tolerant_lines = 2;
5535 }
5536
5537 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5538
5539 if (dce8_latency_watermark(wm) <= latency_hiding)
5540 return true;
5541 else
5542 return false;
5543}
5544
5545/**
5546 * dce8_program_watermarks - program display watermarks
5547 *
5548 * @rdev: radeon_device pointer
5549 * @radeon_crtc: the selected display controller
5550 * @lb_size: line buffer size
5551 * @num_heads: number of display controllers in use
5552 *
5553 * Calculate and program the display watermarks for the
5554 * selected display controller (CIK).
5555 */
5556static void dce8_program_watermarks(struct radeon_device *rdev,
5557 struct radeon_crtc *radeon_crtc,
5558 u32 lb_size, u32 num_heads)
5559{
5560 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5561 struct dce8_wm_params wm;
5562 u32 pixel_period;
5563 u32 line_time = 0;
5564 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5565 u32 tmp, wm_mask;
5566
5567 if (radeon_crtc->base.enabled && num_heads && mode) {
5568 pixel_period = 1000000 / (u32)mode->clock;
5569 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5570
5571 wm.yclk = rdev->pm.current_mclk * 10;
5572 wm.sclk = rdev->pm.current_sclk * 10;
5573 wm.disp_clk = mode->clock;
5574 wm.src_width = mode->crtc_hdisplay;
5575 wm.active_time = mode->crtc_hdisplay * pixel_period;
5576 wm.blank_time = line_time - wm.active_time;
5577 wm.interlaced = false;
5578 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5579 wm.interlaced = true;
5580 wm.vsc = radeon_crtc->vsc;
5581 wm.vtaps = 1;
5582 if (radeon_crtc->rmx_type != RMX_OFF)
5583 wm.vtaps = 2;
5584 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5585 wm.lb_size = lb_size;
5586 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5587 wm.num_heads = num_heads;
5588
5589 /* set for high clocks */
5590 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5591 /* set for low clocks */
5592 /* wm.yclk = low clk; wm.sclk = low clk */
5593 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5594
5595 /* possibly force display priority to high */
5596 /* should really do this at mode validation time... */
5597 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5598 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5599 !dce8_check_latency_hiding(&wm) ||
5600 (rdev->disp_priority == 2)) {
5601 DRM_DEBUG_KMS("force priority to high\n");
5602 }
5603 }
5604
5605 /* select wm A */
5606 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5607 tmp = wm_mask;
5608 tmp &= ~LATENCY_WATERMARK_MASK(3);
5609 tmp |= LATENCY_WATERMARK_MASK(1);
5610 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5611 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5612 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5613 LATENCY_HIGH_WATERMARK(line_time)));
5614 /* select wm B */
5615 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5616 tmp &= ~LATENCY_WATERMARK_MASK(3);
5617 tmp |= LATENCY_WATERMARK_MASK(2);
5618 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5619 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5620 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5621 LATENCY_HIGH_WATERMARK(line_time)));
5622 /* restore original selection */
5623 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5624}
5625
5626/**
5627 * dce8_bandwidth_update - program display watermarks
5628 *
5629 * @rdev: radeon_device pointer
5630 *
5631 * Calculate and program the display watermarks and line
5632 * buffer allocation (CIK).
5633 */
5634void dce8_bandwidth_update(struct radeon_device *rdev)
5635{
5636 struct drm_display_mode *mode = NULL;
5637 u32 num_heads = 0, lb_size;
5638 int i;
5639
5640 radeon_update_display_priority(rdev);
5641
5642 for (i = 0; i < rdev->num_crtc; i++) {
5643 if (rdev->mode_info.crtcs[i]->base.enabled)
5644 num_heads++;
5645 }
5646 for (i = 0; i < rdev->num_crtc; i++) {
5647 mode = &rdev->mode_info.crtcs[i]->base.mode;
5648 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5649 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5650 }
5651}
Alex Deucher44fa3462012-12-18 22:17:00 -05005652
5653/**
5654 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5655 *
5656 * @rdev: radeon_device pointer
5657 *
5658 * Fetches a GPU clock counter snapshot (SI).
5659 * Returns the 64 bit clock counter snapshot.
5660 */
5661uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5662{
5663 uint64_t clock;
5664
5665 mutex_lock(&rdev->gpu_clock_mutex);
5666 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5667 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5668 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5669 mutex_unlock(&rdev->gpu_clock_mutex);
5670 return clock;
5671}
5672