blob: 72c7e83c6d8cab58c674614f0c01690b366de19b [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
47
48MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
49MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
50MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
54MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
60MODULE_FIRMWARE("radeon/KABINI_me.bin");
61MODULE_FIRMWARE("radeon/KABINI_ce.bin");
62MODULE_FIRMWARE("radeon/KABINI_mec.bin");
63MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
64
Alex Deuchera59781b2012-11-09 10:45:57 -050065extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040067extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deucher1c491652013-04-09 12:45:26 -040069extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher6f2043c2013-04-09 12:43:41 -040070
Alex Deucherbc8273f2012-06-29 19:44:04 -040071#define BONAIRE_IO_MC_REGS_SIZE 36
72
73static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
74{
75 {0x00000070, 0x04400000},
76 {0x00000071, 0x80c01803},
77 {0x00000072, 0x00004004},
78 {0x00000073, 0x00000100},
79 {0x00000074, 0x00ff0000},
80 {0x00000075, 0x34000000},
81 {0x00000076, 0x08000014},
82 {0x00000077, 0x00cc08ec},
83 {0x00000078, 0x00000400},
84 {0x00000079, 0x00000000},
85 {0x0000007a, 0x04090000},
86 {0x0000007c, 0x00000000},
87 {0x0000007e, 0x4408a8e8},
88 {0x0000007f, 0x00000304},
89 {0x00000080, 0x00000000},
90 {0x00000082, 0x00000001},
91 {0x00000083, 0x00000002},
92 {0x00000084, 0xf3e4f400},
93 {0x00000085, 0x052024e3},
94 {0x00000087, 0x00000000},
95 {0x00000088, 0x01000000},
96 {0x0000008a, 0x1c0a0000},
97 {0x0000008b, 0xff010000},
98 {0x0000008d, 0xffffefff},
99 {0x0000008e, 0xfff3efff},
100 {0x0000008f, 0xfff3efbf},
101 {0x00000092, 0xf7ffffff},
102 {0x00000093, 0xffffff7f},
103 {0x00000095, 0x00101101},
104 {0x00000096, 0x00000fff},
105 {0x00000097, 0x00116fff},
106 {0x00000098, 0x60010000},
107 {0x00000099, 0x10010000},
108 {0x0000009a, 0x00006000},
109 {0x0000009b, 0x00001000},
110 {0x0000009f, 0x00b48000}
111};
112
113/* ucode loading */
114/**
115 * ci_mc_load_microcode - load MC ucode into the hw
116 *
117 * @rdev: radeon_device pointer
118 *
119 * Load the GDDR MC ucode into the hw (CIK).
120 * Returns 0 on success, error on failure.
121 */
122static int ci_mc_load_microcode(struct radeon_device *rdev)
123{
124 const __be32 *fw_data;
125 u32 running, blackout = 0;
126 u32 *io_mc_regs;
127 int i, ucode_size, regs_size;
128
129 if (!rdev->mc_fw)
130 return -EINVAL;
131
132 switch (rdev->family) {
133 case CHIP_BONAIRE:
134 default:
135 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
136 ucode_size = CIK_MC_UCODE_SIZE;
137 regs_size = BONAIRE_IO_MC_REGS_SIZE;
138 break;
139 }
140
141 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
142
143 if (running == 0) {
144 if (running) {
145 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
146 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
147 }
148
149 /* reset the engine and set to writable */
150 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
151 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
152
153 /* load mc io regs */
154 for (i = 0; i < regs_size; i++) {
155 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
156 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
157 }
158 /* load the MC ucode */
159 fw_data = (const __be32 *)rdev->mc_fw->data;
160 for (i = 0; i < ucode_size; i++)
161 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
162
163 /* put the engine back into the active state */
164 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
165 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
166 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
167
168 /* wait for training to complete */
169 for (i = 0; i < rdev->usec_timeout; i++) {
170 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
171 break;
172 udelay(1);
173 }
174 for (i = 0; i < rdev->usec_timeout; i++) {
175 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
176 break;
177 udelay(1);
178 }
179
180 if (running)
181 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
182 }
183
184 return 0;
185}
186
Alex Deucher02c81322012-12-18 21:43:07 -0500187/**
188 * cik_init_microcode - load ucode images from disk
189 *
190 * @rdev: radeon_device pointer
191 *
192 * Use the firmware interface to load the ucode images into
193 * the driver (not loaded into hw).
194 * Returns 0 on success, error on failure.
195 */
196static int cik_init_microcode(struct radeon_device *rdev)
197{
198 struct platform_device *pdev;
199 const char *chip_name;
200 size_t pfp_req_size, me_req_size, ce_req_size,
201 mec_req_size, rlc_req_size, mc_req_size;
202 char fw_name[30];
203 int err;
204
205 DRM_DEBUG("\n");
206
207 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
208 err = IS_ERR(pdev);
209 if (err) {
210 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
211 return -EINVAL;
212 }
213
214 switch (rdev->family) {
215 case CHIP_BONAIRE:
216 chip_name = "BONAIRE";
217 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
218 me_req_size = CIK_ME_UCODE_SIZE * 4;
219 ce_req_size = CIK_CE_UCODE_SIZE * 4;
220 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
221 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
222 mc_req_size = CIK_MC_UCODE_SIZE * 4;
223 break;
224 case CHIP_KAVERI:
225 chip_name = "KAVERI";
226 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
227 me_req_size = CIK_ME_UCODE_SIZE * 4;
228 ce_req_size = CIK_CE_UCODE_SIZE * 4;
229 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
230 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
231 break;
232 case CHIP_KABINI:
233 chip_name = "KABINI";
234 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
235 me_req_size = CIK_ME_UCODE_SIZE * 4;
236 ce_req_size = CIK_CE_UCODE_SIZE * 4;
237 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
238 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
239 break;
240 default: BUG();
241 }
242
243 DRM_INFO("Loading %s Microcode\n", chip_name);
244
245 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
246 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
247 if (err)
248 goto out;
249 if (rdev->pfp_fw->size != pfp_req_size) {
250 printk(KERN_ERR
251 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
252 rdev->pfp_fw->size, fw_name);
253 err = -EINVAL;
254 goto out;
255 }
256
257 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
258 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
259 if (err)
260 goto out;
261 if (rdev->me_fw->size != me_req_size) {
262 printk(KERN_ERR
263 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
264 rdev->me_fw->size, fw_name);
265 err = -EINVAL;
266 }
267
268 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
269 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
270 if (err)
271 goto out;
272 if (rdev->ce_fw->size != ce_req_size) {
273 printk(KERN_ERR
274 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
275 rdev->ce_fw->size, fw_name);
276 err = -EINVAL;
277 }
278
279 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
280 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
281 if (err)
282 goto out;
283 if (rdev->mec_fw->size != mec_req_size) {
284 printk(KERN_ERR
285 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
286 rdev->mec_fw->size, fw_name);
287 err = -EINVAL;
288 }
289
290 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
291 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
292 if (err)
293 goto out;
294 if (rdev->rlc_fw->size != rlc_req_size) {
295 printk(KERN_ERR
296 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
297 rdev->rlc_fw->size, fw_name);
298 err = -EINVAL;
299 }
300
301 /* No MC ucode on APUs */
302 if (!(rdev->flags & RADEON_IS_IGP)) {
303 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
304 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
305 if (err)
306 goto out;
307 if (rdev->mc_fw->size != mc_req_size) {
308 printk(KERN_ERR
309 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
310 rdev->mc_fw->size, fw_name);
311 err = -EINVAL;
312 }
313 }
314
315out:
316 platform_device_unregister(pdev);
317
318 if (err) {
319 if (err != -EINVAL)
320 printk(KERN_ERR
321 "cik_cp: Failed to load firmware \"%s\"\n",
322 fw_name);
323 release_firmware(rdev->pfp_fw);
324 rdev->pfp_fw = NULL;
325 release_firmware(rdev->me_fw);
326 rdev->me_fw = NULL;
327 release_firmware(rdev->ce_fw);
328 rdev->ce_fw = NULL;
329 release_firmware(rdev->rlc_fw);
330 rdev->rlc_fw = NULL;
331 release_firmware(rdev->mc_fw);
332 rdev->mc_fw = NULL;
333 }
334 return err;
335}
336
Alex Deucher8cc1a532013-04-09 12:41:24 -0400337/*
338 * Core functions
339 */
340/**
341 * cik_tiling_mode_table_init - init the hw tiling table
342 *
343 * @rdev: radeon_device pointer
344 *
345 * Starting with SI, the tiling setup is done globally in a
346 * set of 32 tiling modes. Rather than selecting each set of
347 * parameters per surface as on older asics, we just select
348 * which index in the tiling table we want to use, and the
349 * surface uses those parameters (CIK).
350 */
351static void cik_tiling_mode_table_init(struct radeon_device *rdev)
352{
353 const u32 num_tile_mode_states = 32;
354 const u32 num_secondary_tile_mode_states = 16;
355 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
356 u32 num_pipe_configs;
357 u32 num_rbs = rdev->config.cik.max_backends_per_se *
358 rdev->config.cik.max_shader_engines;
359
360 switch (rdev->config.cik.mem_row_size_in_kb) {
361 case 1:
362 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
363 break;
364 case 2:
365 default:
366 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
367 break;
368 case 4:
369 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
370 break;
371 }
372
373 num_pipe_configs = rdev->config.cik.max_tile_pipes;
374 if (num_pipe_configs > 8)
375 num_pipe_configs = 8; /* ??? */
376
377 if (num_pipe_configs == 8) {
378 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
379 switch (reg_offset) {
380 case 0:
381 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
383 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
385 break;
386 case 1:
387 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
389 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
390 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
391 break;
392 case 2:
393 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
395 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
397 break;
398 case 3:
399 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
401 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
403 break;
404 case 4:
405 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
407 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
408 TILE_SPLIT(split_equal_to_row_size));
409 break;
410 case 5:
411 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
413 break;
414 case 6:
415 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
417 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
419 break;
420 case 7:
421 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
423 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
424 TILE_SPLIT(split_equal_to_row_size));
425 break;
426 case 8:
427 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
428 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
429 break;
430 case 9:
431 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
432 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
433 break;
434 case 10:
435 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
437 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
439 break;
440 case 11:
441 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
442 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
443 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
445 break;
446 case 12:
447 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
448 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
449 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
451 break;
452 case 13:
453 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
455 break;
456 case 14:
457 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
458 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
461 break;
462 case 16:
463 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
465 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
467 break;
468 case 17:
469 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
470 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
471 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
473 break;
474 case 27:
475 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
476 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
477 break;
478 case 28:
479 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
480 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
483 break;
484 case 29:
485 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
486 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
487 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
489 break;
490 case 30:
491 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
492 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
495 break;
496 default:
497 gb_tile_moden = 0;
498 break;
499 }
500 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
501 }
502 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
503 switch (reg_offset) {
504 case 0:
505 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
508 NUM_BANKS(ADDR_SURF_16_BANK));
509 break;
510 case 1:
511 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
514 NUM_BANKS(ADDR_SURF_16_BANK));
515 break;
516 case 2:
517 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
520 NUM_BANKS(ADDR_SURF_16_BANK));
521 break;
522 case 3:
523 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
526 NUM_BANKS(ADDR_SURF_16_BANK));
527 break;
528 case 4:
529 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
532 NUM_BANKS(ADDR_SURF_8_BANK));
533 break;
534 case 5:
535 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
538 NUM_BANKS(ADDR_SURF_4_BANK));
539 break;
540 case 6:
541 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
544 NUM_BANKS(ADDR_SURF_2_BANK));
545 break;
546 case 8:
547 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
550 NUM_BANKS(ADDR_SURF_16_BANK));
551 break;
552 case 9:
553 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
556 NUM_BANKS(ADDR_SURF_16_BANK));
557 break;
558 case 10:
559 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
562 NUM_BANKS(ADDR_SURF_16_BANK));
563 break;
564 case 11:
565 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
568 NUM_BANKS(ADDR_SURF_16_BANK));
569 break;
570 case 12:
571 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
574 NUM_BANKS(ADDR_SURF_8_BANK));
575 break;
576 case 13:
577 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
580 NUM_BANKS(ADDR_SURF_4_BANK));
581 break;
582 case 14:
583 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
586 NUM_BANKS(ADDR_SURF_2_BANK));
587 break;
588 default:
589 gb_tile_moden = 0;
590 break;
591 }
592 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
593 }
594 } else if (num_pipe_configs == 4) {
595 if (num_rbs == 4) {
596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
597 switch (reg_offset) {
598 case 0:
599 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
600 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
601 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
603 break;
604 case 1:
605 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
606 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
607 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
609 break;
610 case 2:
611 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
613 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
615 break;
616 case 3:
617 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
618 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
619 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
620 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
621 break;
622 case 4:
623 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
625 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
626 TILE_SPLIT(split_equal_to_row_size));
627 break;
628 case 5:
629 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
631 break;
632 case 6:
633 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
635 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
636 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
637 break;
638 case 7:
639 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
640 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
641 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
642 TILE_SPLIT(split_equal_to_row_size));
643 break;
644 case 8:
645 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
646 PIPE_CONFIG(ADDR_SURF_P4_16x16));
647 break;
648 case 9:
649 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
650 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
651 break;
652 case 10:
653 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
654 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
655 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
657 break;
658 case 11:
659 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
660 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
661 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
663 break;
664 case 12:
665 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
666 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
669 break;
670 case 13:
671 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
672 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
673 break;
674 case 14:
675 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
676 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
677 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
679 break;
680 case 16:
681 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
682 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
683 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
685 break;
686 case 17:
687 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
689 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
691 break;
692 case 27:
693 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
695 break;
696 case 28:
697 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
701 break;
702 case 29:
703 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
704 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
705 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
707 break;
708 case 30:
709 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
710 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
713 break;
714 default:
715 gb_tile_moden = 0;
716 break;
717 }
718 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
719 }
720 } else if (num_rbs < 4) {
721 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
722 switch (reg_offset) {
723 case 0:
724 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
726 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
728 break;
729 case 1:
730 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
732 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
734 break;
735 case 2:
736 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
737 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
738 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
740 break;
741 case 3:
742 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
743 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
744 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
745 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
746 break;
747 case 4:
748 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
749 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
750 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
751 TILE_SPLIT(split_equal_to_row_size));
752 break;
753 case 5:
754 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
755 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
756 break;
757 case 6:
758 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
760 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
762 break;
763 case 7:
764 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
766 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
767 TILE_SPLIT(split_equal_to_row_size));
768 break;
769 case 8:
770 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
771 PIPE_CONFIG(ADDR_SURF_P4_8x16));
772 break;
773 case 9:
774 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
775 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
776 break;
777 case 10:
778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
780 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
782 break;
783 case 11:
784 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
785 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
786 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
788 break;
789 case 12:
790 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
791 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
792 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
794 break;
795 case 13:
796 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
797 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
798 break;
799 case 14:
800 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
802 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
804 break;
805 case 16:
806 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
807 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
808 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
810 break;
811 case 17:
812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
813 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
814 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
816 break;
817 case 27:
818 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
820 break;
821 case 28:
822 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
824 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
826 break;
827 case 29:
828 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
829 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
830 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
832 break;
833 case 30:
834 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
835 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
836 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
838 break;
839 default:
840 gb_tile_moden = 0;
841 break;
842 }
843 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
844 }
845 }
846 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
847 switch (reg_offset) {
848 case 0:
849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
852 NUM_BANKS(ADDR_SURF_16_BANK));
853 break;
854 case 1:
855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
858 NUM_BANKS(ADDR_SURF_16_BANK));
859 break;
860 case 2:
861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
864 NUM_BANKS(ADDR_SURF_16_BANK));
865 break;
866 case 3:
867 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
870 NUM_BANKS(ADDR_SURF_16_BANK));
871 break;
872 case 4:
873 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
876 NUM_BANKS(ADDR_SURF_16_BANK));
877 break;
878 case 5:
879 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
882 NUM_BANKS(ADDR_SURF_8_BANK));
883 break;
884 case 6:
885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
888 NUM_BANKS(ADDR_SURF_4_BANK));
889 break;
890 case 8:
891 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
894 NUM_BANKS(ADDR_SURF_16_BANK));
895 break;
896 case 9:
897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
900 NUM_BANKS(ADDR_SURF_16_BANK));
901 break;
902 case 10:
903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
906 NUM_BANKS(ADDR_SURF_16_BANK));
907 break;
908 case 11:
909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
912 NUM_BANKS(ADDR_SURF_16_BANK));
913 break;
914 case 12:
915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
918 NUM_BANKS(ADDR_SURF_16_BANK));
919 break;
920 case 13:
921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
924 NUM_BANKS(ADDR_SURF_8_BANK));
925 break;
926 case 14:
927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
930 NUM_BANKS(ADDR_SURF_4_BANK));
931 break;
932 default:
933 gb_tile_moden = 0;
934 break;
935 }
936 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
937 }
938 } else if (num_pipe_configs == 2) {
939 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
940 switch (reg_offset) {
941 case 0:
942 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
943 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
944 PIPE_CONFIG(ADDR_SURF_P2) |
945 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
946 break;
947 case 1:
948 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
949 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
950 PIPE_CONFIG(ADDR_SURF_P2) |
951 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
952 break;
953 case 2:
954 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
955 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
956 PIPE_CONFIG(ADDR_SURF_P2) |
957 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
958 break;
959 case 3:
960 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
961 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
962 PIPE_CONFIG(ADDR_SURF_P2) |
963 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
964 break;
965 case 4:
966 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 PIPE_CONFIG(ADDR_SURF_P2) |
969 TILE_SPLIT(split_equal_to_row_size));
970 break;
971 case 5:
972 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
974 break;
975 case 6:
976 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
978 PIPE_CONFIG(ADDR_SURF_P2) |
979 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
980 break;
981 case 7:
982 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
984 PIPE_CONFIG(ADDR_SURF_P2) |
985 TILE_SPLIT(split_equal_to_row_size));
986 break;
987 case 8:
988 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
989 break;
990 case 9:
991 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
993 break;
994 case 10:
995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
996 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
997 PIPE_CONFIG(ADDR_SURF_P2) |
998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
999 break;
1000 case 11:
1001 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1002 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1003 PIPE_CONFIG(ADDR_SURF_P2) |
1004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1005 break;
1006 case 12:
1007 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1008 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1009 PIPE_CONFIG(ADDR_SURF_P2) |
1010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1011 break;
1012 case 13:
1013 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1014 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1015 break;
1016 case 14:
1017 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1018 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1019 PIPE_CONFIG(ADDR_SURF_P2) |
1020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1021 break;
1022 case 16:
1023 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1024 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1025 PIPE_CONFIG(ADDR_SURF_P2) |
1026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1027 break;
1028 case 17:
1029 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1031 PIPE_CONFIG(ADDR_SURF_P2) |
1032 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1033 break;
1034 case 27:
1035 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1036 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1037 break;
1038 case 28:
1039 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1041 PIPE_CONFIG(ADDR_SURF_P2) |
1042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1043 break;
1044 case 29:
1045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1047 PIPE_CONFIG(ADDR_SURF_P2) |
1048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1049 break;
1050 case 30:
1051 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1053 PIPE_CONFIG(ADDR_SURF_P2) |
1054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1055 break;
1056 default:
1057 gb_tile_moden = 0;
1058 break;
1059 }
1060 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1061 }
1062 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1063 switch (reg_offset) {
1064 case 0:
1065 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1068 NUM_BANKS(ADDR_SURF_16_BANK));
1069 break;
1070 case 1:
1071 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1074 NUM_BANKS(ADDR_SURF_16_BANK));
1075 break;
1076 case 2:
1077 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1078 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1079 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1080 NUM_BANKS(ADDR_SURF_16_BANK));
1081 break;
1082 case 3:
1083 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1086 NUM_BANKS(ADDR_SURF_16_BANK));
1087 break;
1088 case 4:
1089 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1092 NUM_BANKS(ADDR_SURF_16_BANK));
1093 break;
1094 case 5:
1095 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1096 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1097 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1098 NUM_BANKS(ADDR_SURF_16_BANK));
1099 break;
1100 case 6:
1101 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1104 NUM_BANKS(ADDR_SURF_8_BANK));
1105 break;
1106 case 8:
1107 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1110 NUM_BANKS(ADDR_SURF_16_BANK));
1111 break;
1112 case 9:
1113 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1116 NUM_BANKS(ADDR_SURF_16_BANK));
1117 break;
1118 case 10:
1119 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1120 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1121 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1122 NUM_BANKS(ADDR_SURF_16_BANK));
1123 break;
1124 case 11:
1125 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1128 NUM_BANKS(ADDR_SURF_16_BANK));
1129 break;
1130 case 12:
1131 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1134 NUM_BANKS(ADDR_SURF_16_BANK));
1135 break;
1136 case 13:
1137 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1140 NUM_BANKS(ADDR_SURF_16_BANK));
1141 break;
1142 case 14:
1143 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1146 NUM_BANKS(ADDR_SURF_8_BANK));
1147 break;
1148 default:
1149 gb_tile_moden = 0;
1150 break;
1151 }
1152 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1153 }
1154 } else
1155 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1156}
1157
1158/**
1159 * cik_select_se_sh - select which SE, SH to address
1160 *
1161 * @rdev: radeon_device pointer
1162 * @se_num: shader engine to address
1163 * @sh_num: sh block to address
1164 *
1165 * Select which SE, SH combinations to address. Certain
1166 * registers are instanced per SE or SH. 0xffffffff means
1167 * broadcast to all SEs or SHs (CIK).
1168 */
1169static void cik_select_se_sh(struct radeon_device *rdev,
1170 u32 se_num, u32 sh_num)
1171{
1172 u32 data = INSTANCE_BROADCAST_WRITES;
1173
1174 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1175 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1176 else if (se_num == 0xffffffff)
1177 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1178 else if (sh_num == 0xffffffff)
1179 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1180 else
1181 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1182 WREG32(GRBM_GFX_INDEX, data);
1183}
1184
1185/**
1186 * cik_create_bitmask - create a bitmask
1187 *
1188 * @bit_width: length of the mask
1189 *
1190 * create a variable length bit mask (CIK).
1191 * Returns the bitmask.
1192 */
1193static u32 cik_create_bitmask(u32 bit_width)
1194{
1195 u32 i, mask = 0;
1196
1197 for (i = 0; i < bit_width; i++) {
1198 mask <<= 1;
1199 mask |= 1;
1200 }
1201 return mask;
1202}
1203
1204/**
1205 * cik_select_se_sh - select which SE, SH to address
1206 *
1207 * @rdev: radeon_device pointer
1208 * @max_rb_num: max RBs (render backends) for the asic
1209 * @se_num: number of SEs (shader engines) for the asic
1210 * @sh_per_se: number of SH blocks per SE for the asic
1211 *
1212 * Calculates the bitmask of disabled RBs (CIK).
1213 * Returns the disabled RB bitmask.
1214 */
1215static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1216 u32 max_rb_num, u32 se_num,
1217 u32 sh_per_se)
1218{
1219 u32 data, mask;
1220
1221 data = RREG32(CC_RB_BACKEND_DISABLE);
1222 if (data & 1)
1223 data &= BACKEND_DISABLE_MASK;
1224 else
1225 data = 0;
1226 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1227
1228 data >>= BACKEND_DISABLE_SHIFT;
1229
1230 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1231
1232 return data & mask;
1233}
1234
1235/**
1236 * cik_setup_rb - setup the RBs on the asic
1237 *
1238 * @rdev: radeon_device pointer
1239 * @se_num: number of SEs (shader engines) for the asic
1240 * @sh_per_se: number of SH blocks per SE for the asic
1241 * @max_rb_num: max RBs (render backends) for the asic
1242 *
1243 * Configures per-SE/SH RB registers (CIK).
1244 */
1245static void cik_setup_rb(struct radeon_device *rdev,
1246 u32 se_num, u32 sh_per_se,
1247 u32 max_rb_num)
1248{
1249 int i, j;
1250 u32 data, mask;
1251 u32 disabled_rbs = 0;
1252 u32 enabled_rbs = 0;
1253
1254 for (i = 0; i < se_num; i++) {
1255 for (j = 0; j < sh_per_se; j++) {
1256 cik_select_se_sh(rdev, i, j);
1257 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1258 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1259 }
1260 }
1261 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1262
1263 mask = 1;
1264 for (i = 0; i < max_rb_num; i++) {
1265 if (!(disabled_rbs & mask))
1266 enabled_rbs |= mask;
1267 mask <<= 1;
1268 }
1269
1270 for (i = 0; i < se_num; i++) {
1271 cik_select_se_sh(rdev, i, 0xffffffff);
1272 data = 0;
1273 for (j = 0; j < sh_per_se; j++) {
1274 switch (enabled_rbs & 3) {
1275 case 1:
1276 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1277 break;
1278 case 2:
1279 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1280 break;
1281 case 3:
1282 default:
1283 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1284 break;
1285 }
1286 enabled_rbs >>= 2;
1287 }
1288 WREG32(PA_SC_RASTER_CONFIG, data);
1289 }
1290 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1291}
1292
1293/**
1294 * cik_gpu_init - setup the 3D engine
1295 *
1296 * @rdev: radeon_device pointer
1297 *
1298 * Configures the 3D engine and tiling configuration
1299 * registers so that the 3D engine is usable.
1300 */
1301static void cik_gpu_init(struct radeon_device *rdev)
1302{
1303 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1304 u32 mc_shared_chmap, mc_arb_ramcfg;
1305 u32 hdp_host_path_cntl;
1306 u32 tmp;
1307 int i, j;
1308
1309 switch (rdev->family) {
1310 case CHIP_BONAIRE:
1311 rdev->config.cik.max_shader_engines = 2;
1312 rdev->config.cik.max_tile_pipes = 4;
1313 rdev->config.cik.max_cu_per_sh = 7;
1314 rdev->config.cik.max_sh_per_se = 1;
1315 rdev->config.cik.max_backends_per_se = 2;
1316 rdev->config.cik.max_texture_channel_caches = 4;
1317 rdev->config.cik.max_gprs = 256;
1318 rdev->config.cik.max_gs_threads = 32;
1319 rdev->config.cik.max_hw_contexts = 8;
1320
1321 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1322 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1323 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1324 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1325 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1326 break;
1327 case CHIP_KAVERI:
1328 /* TODO */
1329 break;
1330 case CHIP_KABINI:
1331 default:
1332 rdev->config.cik.max_shader_engines = 1;
1333 rdev->config.cik.max_tile_pipes = 2;
1334 rdev->config.cik.max_cu_per_sh = 2;
1335 rdev->config.cik.max_sh_per_se = 1;
1336 rdev->config.cik.max_backends_per_se = 1;
1337 rdev->config.cik.max_texture_channel_caches = 2;
1338 rdev->config.cik.max_gprs = 256;
1339 rdev->config.cik.max_gs_threads = 16;
1340 rdev->config.cik.max_hw_contexts = 8;
1341
1342 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1343 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1344 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1345 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1346 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1347 break;
1348 }
1349
1350 /* Initialize HDP */
1351 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1352 WREG32((0x2c14 + j), 0x00000000);
1353 WREG32((0x2c18 + j), 0x00000000);
1354 WREG32((0x2c1c + j), 0x00000000);
1355 WREG32((0x2c20 + j), 0x00000000);
1356 WREG32((0x2c24 + j), 0x00000000);
1357 }
1358
1359 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1360
1361 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1362
1363 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1364 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1365
1366 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1367 rdev->config.cik.mem_max_burst_length_bytes = 256;
1368 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1369 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1370 if (rdev->config.cik.mem_row_size_in_kb > 4)
1371 rdev->config.cik.mem_row_size_in_kb = 4;
1372 /* XXX use MC settings? */
1373 rdev->config.cik.shader_engine_tile_size = 32;
1374 rdev->config.cik.num_gpus = 1;
1375 rdev->config.cik.multi_gpu_tile_size = 64;
1376
1377 /* fix up row size */
1378 gb_addr_config &= ~ROW_SIZE_MASK;
1379 switch (rdev->config.cik.mem_row_size_in_kb) {
1380 case 1:
1381 default:
1382 gb_addr_config |= ROW_SIZE(0);
1383 break;
1384 case 2:
1385 gb_addr_config |= ROW_SIZE(1);
1386 break;
1387 case 4:
1388 gb_addr_config |= ROW_SIZE(2);
1389 break;
1390 }
1391
1392 /* setup tiling info dword. gb_addr_config is not adequate since it does
1393 * not have bank info, so create a custom tiling dword.
1394 * bits 3:0 num_pipes
1395 * bits 7:4 num_banks
1396 * bits 11:8 group_size
1397 * bits 15:12 row_size
1398 */
1399 rdev->config.cik.tile_config = 0;
1400 switch (rdev->config.cik.num_tile_pipes) {
1401 case 1:
1402 rdev->config.cik.tile_config |= (0 << 0);
1403 break;
1404 case 2:
1405 rdev->config.cik.tile_config |= (1 << 0);
1406 break;
1407 case 4:
1408 rdev->config.cik.tile_config |= (2 << 0);
1409 break;
1410 case 8:
1411 default:
1412 /* XXX what about 12? */
1413 rdev->config.cik.tile_config |= (3 << 0);
1414 break;
1415 }
1416 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1417 rdev->config.cik.tile_config |= 1 << 4;
1418 else
1419 rdev->config.cik.tile_config |= 0 << 4;
1420 rdev->config.cik.tile_config |=
1421 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1422 rdev->config.cik.tile_config |=
1423 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1424
1425 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1426 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1427 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1428
1429 cik_tiling_mode_table_init(rdev);
1430
1431 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1432 rdev->config.cik.max_sh_per_se,
1433 rdev->config.cik.max_backends_per_se);
1434
1435 /* set HW defaults for 3D engine */
1436 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1437
1438 WREG32(SX_DEBUG_1, 0x20);
1439
1440 WREG32(TA_CNTL_AUX, 0x00010000);
1441
1442 tmp = RREG32(SPI_CONFIG_CNTL);
1443 tmp |= 0x03000000;
1444 WREG32(SPI_CONFIG_CNTL, tmp);
1445
1446 WREG32(SQ_CONFIG, 1);
1447
1448 WREG32(DB_DEBUG, 0);
1449
1450 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1451 tmp |= 0x00000400;
1452 WREG32(DB_DEBUG2, tmp);
1453
1454 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1455 tmp |= 0x00020200;
1456 WREG32(DB_DEBUG3, tmp);
1457
1458 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1459 tmp |= 0x00018208;
1460 WREG32(CB_HW_CONTROL, tmp);
1461
1462 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1463
1464 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1465 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1466 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1467 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1468
1469 WREG32(VGT_NUM_INSTANCES, 1);
1470
1471 WREG32(CP_PERFMON_CNTL, 0);
1472
1473 WREG32(SQ_CONFIG, 0);
1474
1475 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1476 FORCE_EOV_MAX_REZ_CNT(255)));
1477
1478 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1479 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1480
1481 WREG32(VGT_GS_VERTEX_REUSE, 16);
1482 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1483
1484 tmp = RREG32(HDP_MISC_CNTL);
1485 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1486 WREG32(HDP_MISC_CNTL, tmp);
1487
1488 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1489 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1490
1491 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1492 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1493
1494 udelay(50);
1495}
1496
Alex Deucher841cf442012-12-18 21:47:44 -05001497/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001498 * GPU scratch registers helpers function.
1499 */
1500/**
1501 * cik_scratch_init - setup driver info for CP scratch regs
1502 *
1503 * @rdev: radeon_device pointer
1504 *
1505 * Set up the number and offset of the CP scratch registers.
1506 * NOTE: use of CP scratch registers is a legacy inferface and
1507 * is not used by default on newer asics (r6xx+). On newer asics,
1508 * memory buffers are used for fences rather than scratch regs.
1509 */
1510static void cik_scratch_init(struct radeon_device *rdev)
1511{
1512 int i;
1513
1514 rdev->scratch.num_reg = 7;
1515 rdev->scratch.reg_base = SCRATCH_REG0;
1516 for (i = 0; i < rdev->scratch.num_reg; i++) {
1517 rdev->scratch.free[i] = true;
1518 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1519 }
1520}
1521
1522/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001523 * cik_ring_test - basic gfx ring test
1524 *
1525 * @rdev: radeon_device pointer
1526 * @ring: radeon_ring structure holding ring information
1527 *
1528 * Allocate a scratch register and write to it using the gfx ring (CIK).
1529 * Provides a basic gfx ring test to verify that the ring is working.
1530 * Used by cik_cp_gfx_resume();
1531 * Returns 0 on success, error on failure.
1532 */
1533int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1534{
1535 uint32_t scratch;
1536 uint32_t tmp = 0;
1537 unsigned i;
1538 int r;
1539
1540 r = radeon_scratch_get(rdev, &scratch);
1541 if (r) {
1542 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1543 return r;
1544 }
1545 WREG32(scratch, 0xCAFEDEAD);
1546 r = radeon_ring_lock(rdev, ring, 3);
1547 if (r) {
1548 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1549 radeon_scratch_free(rdev, scratch);
1550 return r;
1551 }
1552 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1553 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1554 radeon_ring_write(ring, 0xDEADBEEF);
1555 radeon_ring_unlock_commit(rdev, ring);
1556 for (i = 0; i < rdev->usec_timeout; i++) {
1557 tmp = RREG32(scratch);
1558 if (tmp == 0xDEADBEEF)
1559 break;
1560 DRM_UDELAY(1);
1561 }
1562 if (i < rdev->usec_timeout) {
1563 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1564 } else {
1565 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1566 ring->idx, scratch, tmp);
1567 r = -EINVAL;
1568 }
1569 radeon_scratch_free(rdev, scratch);
1570 return r;
1571}
1572
1573/**
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001574 * cik_fence_ring_emit - emit a fence on the gfx ring
1575 *
1576 * @rdev: radeon_device pointer
1577 * @fence: radeon fence object
1578 *
1579 * Emits a fence sequnce number on the gfx ring and flushes
1580 * GPU caches.
1581 */
1582void cik_fence_ring_emit(struct radeon_device *rdev,
1583 struct radeon_fence *fence)
1584{
1585 struct radeon_ring *ring = &rdev->ring[fence->ring];
1586 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1587
1588 /* EVENT_WRITE_EOP - flush caches, send int */
1589 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1590 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1591 EOP_TC_ACTION_EN |
1592 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1593 EVENT_INDEX(5)));
1594 radeon_ring_write(ring, addr & 0xfffffffc);
1595 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1596 radeon_ring_write(ring, fence->seq);
1597 radeon_ring_write(ring, 0);
1598 /* HDP flush */
1599 /* We should be using the new WAIT_REG_MEM special op packet here
1600 * but it causes the CP to hang
1601 */
1602 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1603 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1604 WRITE_DATA_DST_SEL(0)));
1605 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1606 radeon_ring_write(ring, 0);
1607 radeon_ring_write(ring, 0);
1608}
1609
1610void cik_semaphore_ring_emit(struct radeon_device *rdev,
1611 struct radeon_ring *ring,
1612 struct radeon_semaphore *semaphore,
1613 bool emit_wait)
1614{
1615 uint64_t addr = semaphore->gpu_addr;
1616 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1617
1618 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1619 radeon_ring_write(ring, addr & 0xffffffff);
1620 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1621}
1622
1623/*
1624 * IB stuff
1625 */
1626/**
1627 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1628 *
1629 * @rdev: radeon_device pointer
1630 * @ib: radeon indirect buffer object
1631 *
1632 * Emits an DE (drawing engine) or CE (constant engine) IB
1633 * on the gfx ring. IBs are usually generated by userspace
1634 * acceleration drivers and submitted to the kernel for
1635 * sheduling on the ring. This function schedules the IB
1636 * on the gfx ring for execution by the GPU.
1637 */
1638void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1639{
1640 struct radeon_ring *ring = &rdev->ring[ib->ring];
1641 u32 header, control = INDIRECT_BUFFER_VALID;
1642
1643 if (ib->is_const_ib) {
1644 /* set switch buffer packet before const IB */
1645 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1646 radeon_ring_write(ring, 0);
1647
1648 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1649 } else {
1650 u32 next_rptr;
1651 if (ring->rptr_save_reg) {
1652 next_rptr = ring->wptr + 3 + 4;
1653 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1654 radeon_ring_write(ring, ((ring->rptr_save_reg -
1655 PACKET3_SET_UCONFIG_REG_START) >> 2));
1656 radeon_ring_write(ring, next_rptr);
1657 } else if (rdev->wb.enabled) {
1658 next_rptr = ring->wptr + 5 + 4;
1659 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1660 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1661 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1662 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1663 radeon_ring_write(ring, next_rptr);
1664 }
1665
1666 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1667 }
1668
1669 control |= ib->length_dw |
1670 (ib->vm ? (ib->vm->id << 24) : 0);
1671
1672 radeon_ring_write(ring, header);
1673 radeon_ring_write(ring,
1674#ifdef __BIG_ENDIAN
1675 (2 << 0) |
1676#endif
1677 (ib->gpu_addr & 0xFFFFFFFC));
1678 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1679 radeon_ring_write(ring, control);
1680}
1681
Alex Deucherfbc832c2012-07-20 14:41:35 -04001682/**
1683 * cik_ib_test - basic gfx ring IB test
1684 *
1685 * @rdev: radeon_device pointer
1686 * @ring: radeon_ring structure holding ring information
1687 *
1688 * Allocate an IB and execute it on the gfx ring (CIK).
1689 * Provides a basic gfx ring test to verify that IBs are working.
1690 * Returns 0 on success, error on failure.
1691 */
1692int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1693{
1694 struct radeon_ib ib;
1695 uint32_t scratch;
1696 uint32_t tmp = 0;
1697 unsigned i;
1698 int r;
1699
1700 r = radeon_scratch_get(rdev, &scratch);
1701 if (r) {
1702 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1703 return r;
1704 }
1705 WREG32(scratch, 0xCAFEDEAD);
1706 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1707 if (r) {
1708 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1709 return r;
1710 }
1711 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1712 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1713 ib.ptr[2] = 0xDEADBEEF;
1714 ib.length_dw = 3;
1715 r = radeon_ib_schedule(rdev, &ib, NULL);
1716 if (r) {
1717 radeon_scratch_free(rdev, scratch);
1718 radeon_ib_free(rdev, &ib);
1719 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1720 return r;
1721 }
1722 r = radeon_fence_wait(ib.fence, false);
1723 if (r) {
1724 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1725 return r;
1726 }
1727 for (i = 0; i < rdev->usec_timeout; i++) {
1728 tmp = RREG32(scratch);
1729 if (tmp == 0xDEADBEEF)
1730 break;
1731 DRM_UDELAY(1);
1732 }
1733 if (i < rdev->usec_timeout) {
1734 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1735 } else {
1736 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1737 scratch, tmp);
1738 r = -EINVAL;
1739 }
1740 radeon_scratch_free(rdev, scratch);
1741 radeon_ib_free(rdev, &ib);
1742 return r;
1743}
1744
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001745/*
Alex Deucher841cf442012-12-18 21:47:44 -05001746 * CP.
1747 * On CIK, gfx and compute now have independant command processors.
1748 *
1749 * GFX
1750 * Gfx consists of a single ring and can process both gfx jobs and
1751 * compute jobs. The gfx CP consists of three microengines (ME):
1752 * PFP - Pre-Fetch Parser
1753 * ME - Micro Engine
1754 * CE - Constant Engine
1755 * The PFP and ME make up what is considered the Drawing Engine (DE).
1756 * The CE is an asynchronous engine used for updating buffer desciptors
1757 * used by the DE so that they can be loaded into cache in parallel
1758 * while the DE is processing state update packets.
1759 *
1760 * Compute
1761 * The compute CP consists of two microengines (ME):
1762 * MEC1 - Compute MicroEngine 1
1763 * MEC2 - Compute MicroEngine 2
1764 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1765 * The queues are exposed to userspace and are programmed directly
1766 * by the compute runtime.
1767 */
1768/**
1769 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1770 *
1771 * @rdev: radeon_device pointer
1772 * @enable: enable or disable the MEs
1773 *
1774 * Halts or unhalts the gfx MEs.
1775 */
1776static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1777{
1778 if (enable)
1779 WREG32(CP_ME_CNTL, 0);
1780 else {
1781 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1782 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1783 }
1784 udelay(50);
1785}
1786
1787/**
1788 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1789 *
1790 * @rdev: radeon_device pointer
1791 *
1792 * Loads the gfx PFP, ME, and CE ucode.
1793 * Returns 0 for success, -EINVAL if the ucode is not available.
1794 */
1795static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1796{
1797 const __be32 *fw_data;
1798 int i;
1799
1800 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1801 return -EINVAL;
1802
1803 cik_cp_gfx_enable(rdev, false);
1804
1805 /* PFP */
1806 fw_data = (const __be32 *)rdev->pfp_fw->data;
1807 WREG32(CP_PFP_UCODE_ADDR, 0);
1808 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1809 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1810 WREG32(CP_PFP_UCODE_ADDR, 0);
1811
1812 /* CE */
1813 fw_data = (const __be32 *)rdev->ce_fw->data;
1814 WREG32(CP_CE_UCODE_ADDR, 0);
1815 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1816 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1817 WREG32(CP_CE_UCODE_ADDR, 0);
1818
1819 /* ME */
1820 fw_data = (const __be32 *)rdev->me_fw->data;
1821 WREG32(CP_ME_RAM_WADDR, 0);
1822 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1823 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1824 WREG32(CP_ME_RAM_WADDR, 0);
1825
1826 WREG32(CP_PFP_UCODE_ADDR, 0);
1827 WREG32(CP_CE_UCODE_ADDR, 0);
1828 WREG32(CP_ME_RAM_WADDR, 0);
1829 WREG32(CP_ME_RAM_RADDR, 0);
1830 return 0;
1831}
1832
1833/**
1834 * cik_cp_gfx_start - start the gfx ring
1835 *
1836 * @rdev: radeon_device pointer
1837 *
1838 * Enables the ring and loads the clear state context and other
1839 * packets required to init the ring.
1840 * Returns 0 for success, error for failure.
1841 */
1842static int cik_cp_gfx_start(struct radeon_device *rdev)
1843{
1844 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1845 int r, i;
1846
1847 /* init the CP */
1848 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1849 WREG32(CP_ENDIAN_SWAP, 0);
1850 WREG32(CP_DEVICE_ID, 1);
1851
1852 cik_cp_gfx_enable(rdev, true);
1853
1854 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1855 if (r) {
1856 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1857 return r;
1858 }
1859
1860 /* init the CE partitions. CE only used for gfx on CIK */
1861 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1862 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1863 radeon_ring_write(ring, 0xc000);
1864 radeon_ring_write(ring, 0xc000);
1865
1866 /* setup clear context state */
1867 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1868 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1869
1870 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1871 radeon_ring_write(ring, 0x80000000);
1872 radeon_ring_write(ring, 0x80000000);
1873
1874 for (i = 0; i < cik_default_size; i++)
1875 radeon_ring_write(ring, cik_default_state[i]);
1876
1877 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1878 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1879
1880 /* set clear context state */
1881 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1882 radeon_ring_write(ring, 0);
1883
1884 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1885 radeon_ring_write(ring, 0x00000316);
1886 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1887 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1888
1889 radeon_ring_unlock_commit(rdev, ring);
1890
1891 return 0;
1892}
1893
1894/**
1895 * cik_cp_gfx_fini - stop the gfx ring
1896 *
1897 * @rdev: radeon_device pointer
1898 *
1899 * Stop the gfx ring and tear down the driver ring
1900 * info.
1901 */
1902static void cik_cp_gfx_fini(struct radeon_device *rdev)
1903{
1904 cik_cp_gfx_enable(rdev, false);
1905 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1906}
1907
1908/**
1909 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1910 *
1911 * @rdev: radeon_device pointer
1912 *
1913 * Program the location and size of the gfx ring buffer
1914 * and test it to make sure it's working.
1915 * Returns 0 for success, error for failure.
1916 */
1917static int cik_cp_gfx_resume(struct radeon_device *rdev)
1918{
1919 struct radeon_ring *ring;
1920 u32 tmp;
1921 u32 rb_bufsz;
1922 u64 rb_addr;
1923 int r;
1924
1925 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1926 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1927
1928 /* Set the write pointer delay */
1929 WREG32(CP_RB_WPTR_DELAY, 0);
1930
1931 /* set the RB to use vmid 0 */
1932 WREG32(CP_RB_VMID, 0);
1933
1934 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1935
1936 /* ring 0 - compute and gfx */
1937 /* Set ring buffer size */
1938 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1939 rb_bufsz = drm_order(ring->ring_size / 8);
1940 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1941#ifdef __BIG_ENDIAN
1942 tmp |= BUF_SWAP_32BIT;
1943#endif
1944 WREG32(CP_RB0_CNTL, tmp);
1945
1946 /* Initialize the ring buffer's read and write pointers */
1947 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1948 ring->wptr = 0;
1949 WREG32(CP_RB0_WPTR, ring->wptr);
1950
1951 /* set the wb address wether it's enabled or not */
1952 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1953 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1954
1955 /* scratch register shadowing is no longer supported */
1956 WREG32(SCRATCH_UMSK, 0);
1957
1958 if (!rdev->wb.enabled)
1959 tmp |= RB_NO_UPDATE;
1960
1961 mdelay(1);
1962 WREG32(CP_RB0_CNTL, tmp);
1963
1964 rb_addr = ring->gpu_addr >> 8;
1965 WREG32(CP_RB0_BASE, rb_addr);
1966 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1967
1968 ring->rptr = RREG32(CP_RB0_RPTR);
1969
1970 /* start the ring */
1971 cik_cp_gfx_start(rdev);
1972 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1973 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1974 if (r) {
1975 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1976 return r;
1977 }
1978 return 0;
1979}
1980
1981/**
1982 * cik_cp_compute_enable - enable/disable the compute CP MEs
1983 *
1984 * @rdev: radeon_device pointer
1985 * @enable: enable or disable the MEs
1986 *
1987 * Halts or unhalts the compute MEs.
1988 */
1989static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
1990{
1991 if (enable)
1992 WREG32(CP_MEC_CNTL, 0);
1993 else
1994 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
1995 udelay(50);
1996}
1997
1998/**
1999 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2000 *
2001 * @rdev: radeon_device pointer
2002 *
2003 * Loads the compute MEC1&2 ucode.
2004 * Returns 0 for success, -EINVAL if the ucode is not available.
2005 */
2006static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2007{
2008 const __be32 *fw_data;
2009 int i;
2010
2011 if (!rdev->mec_fw)
2012 return -EINVAL;
2013
2014 cik_cp_compute_enable(rdev, false);
2015
2016 /* MEC1 */
2017 fw_data = (const __be32 *)rdev->mec_fw->data;
2018 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2019 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2020 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2021 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2022
2023 if (rdev->family == CHIP_KAVERI) {
2024 /* MEC2 */
2025 fw_data = (const __be32 *)rdev->mec_fw->data;
2026 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2027 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2028 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2029 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2030 }
2031
2032 return 0;
2033}
2034
2035/**
2036 * cik_cp_compute_start - start the compute queues
2037 *
2038 * @rdev: radeon_device pointer
2039 *
2040 * Enable the compute queues.
2041 * Returns 0 for success, error for failure.
2042 */
2043static int cik_cp_compute_start(struct radeon_device *rdev)
2044{
2045 //todo
2046 return 0;
2047}
2048
2049/**
2050 * cik_cp_compute_fini - stop the compute queues
2051 *
2052 * @rdev: radeon_device pointer
2053 *
2054 * Stop the compute queues and tear down the driver queue
2055 * info.
2056 */
2057static void cik_cp_compute_fini(struct radeon_device *rdev)
2058{
2059 cik_cp_compute_enable(rdev, false);
2060 //todo
2061}
2062
2063/**
2064 * cik_cp_compute_resume - setup the compute queue registers
2065 *
2066 * @rdev: radeon_device pointer
2067 *
2068 * Program the compute queues and test them to make sure they
2069 * are working.
2070 * Returns 0 for success, error for failure.
2071 */
2072static int cik_cp_compute_resume(struct radeon_device *rdev)
2073{
2074 int r;
2075
2076 //todo
2077 r = cik_cp_compute_start(rdev);
2078 if (r)
2079 return r;
2080 return 0;
2081}
2082
2083/* XXX temporary wrappers to handle both compute and gfx */
2084/* XXX */
2085static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2086{
2087 cik_cp_gfx_enable(rdev, enable);
2088 cik_cp_compute_enable(rdev, enable);
2089}
2090
2091/* XXX */
2092static int cik_cp_load_microcode(struct radeon_device *rdev)
2093{
2094 int r;
2095
2096 r = cik_cp_gfx_load_microcode(rdev);
2097 if (r)
2098 return r;
2099 r = cik_cp_compute_load_microcode(rdev);
2100 if (r)
2101 return r;
2102
2103 return 0;
2104}
2105
2106/* XXX */
2107static void cik_cp_fini(struct radeon_device *rdev)
2108{
2109 cik_cp_gfx_fini(rdev);
2110 cik_cp_compute_fini(rdev);
2111}
2112
2113/* XXX */
2114static int cik_cp_resume(struct radeon_device *rdev)
2115{
2116 int r;
2117
2118 /* Reset all cp blocks */
2119 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2120 RREG32(GRBM_SOFT_RESET);
2121 mdelay(15);
2122 WREG32(GRBM_SOFT_RESET, 0);
2123 RREG32(GRBM_SOFT_RESET);
2124
2125 r = cik_cp_load_microcode(rdev);
2126 if (r)
2127 return r;
2128
2129 r = cik_cp_gfx_resume(rdev);
2130 if (r)
2131 return r;
2132 r = cik_cp_compute_resume(rdev);
2133 if (r)
2134 return r;
2135
2136 return 0;
2137}
2138
Alex Deucher6f2043c2013-04-09 12:43:41 -04002139/**
2140 * cik_gpu_is_lockup - check if the 3D engine is locked up
2141 *
2142 * @rdev: radeon_device pointer
2143 * @ring: radeon_ring structure holding ring information
2144 *
2145 * Check if the 3D engine is locked up (CIK).
2146 * Returns true if the engine is locked, false if not.
2147 */
2148bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2149{
2150 u32 srbm_status, srbm_status2;
2151 u32 grbm_status, grbm_status2;
2152 u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
2153
2154 srbm_status = RREG32(SRBM_STATUS);
2155 srbm_status2 = RREG32(SRBM_STATUS2);
2156 grbm_status = RREG32(GRBM_STATUS);
2157 grbm_status2 = RREG32(GRBM_STATUS2);
2158 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2159 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2160 grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
2161 grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
2162 if (!(grbm_status & GUI_ACTIVE)) {
2163 radeon_ring_lockup_update(ring);
2164 return false;
2165 }
2166 /* force CP activities */
2167 radeon_ring_force_activity(rdev, ring);
2168 return radeon_ring_test_lockup(rdev, ring);
2169}
2170
2171/**
2172 * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
2173 *
2174 * @rdev: radeon_device pointer
2175 *
2176 * Soft reset the GFX engine and CPG blocks (CIK).
2177 * XXX: deal with reseting RLC and CPF
2178 * Returns 0 for success.
2179 */
2180static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
2181{
2182 struct evergreen_mc_save save;
2183 u32 grbm_reset = 0;
2184
2185 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2186 return 0;
2187
2188 dev_info(rdev->dev, "GPU GFX softreset \n");
2189 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2190 RREG32(GRBM_STATUS));
2191 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2192 RREG32(GRBM_STATUS2));
2193 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2194 RREG32(GRBM_STATUS_SE0));
2195 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2196 RREG32(GRBM_STATUS_SE1));
2197 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2198 RREG32(GRBM_STATUS_SE2));
2199 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2200 RREG32(GRBM_STATUS_SE3));
2201 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2202 RREG32(SRBM_STATUS));
2203 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2204 RREG32(SRBM_STATUS2));
2205 evergreen_mc_stop(rdev, &save);
2206 if (radeon_mc_wait_for_idle(rdev)) {
2207 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2208 }
2209 /* Disable CP parsing/prefetching */
2210 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2211
2212 /* reset all the gfx block and all CPG blocks */
2213 grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
2214
2215 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2216 WREG32(GRBM_SOFT_RESET, grbm_reset);
2217 (void)RREG32(GRBM_SOFT_RESET);
2218 udelay(50);
2219 WREG32(GRBM_SOFT_RESET, 0);
2220 (void)RREG32(GRBM_SOFT_RESET);
2221 /* Wait a little for things to settle down */
2222 udelay(50);
2223 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2224 RREG32(GRBM_STATUS));
2225 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2226 RREG32(GRBM_STATUS2));
2227 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2228 RREG32(GRBM_STATUS_SE0));
2229 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2230 RREG32(GRBM_STATUS_SE1));
2231 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2232 RREG32(GRBM_STATUS_SE2));
2233 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2234 RREG32(GRBM_STATUS_SE3));
2235 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2236 RREG32(SRBM_STATUS));
2237 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2238 RREG32(SRBM_STATUS2));
2239 evergreen_mc_resume(rdev, &save);
2240 return 0;
2241}
2242
2243/**
2244 * cik_compute_gpu_soft_reset - soft reset CPC
2245 *
2246 * @rdev: radeon_device pointer
2247 *
2248 * Soft reset the CPC blocks (CIK).
2249 * XXX: deal with reseting RLC and CPF
2250 * Returns 0 for success.
2251 */
2252static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
2253{
2254 struct evergreen_mc_save save;
2255 u32 grbm_reset = 0;
2256
2257 dev_info(rdev->dev, "GPU compute softreset \n");
2258 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2259 RREG32(GRBM_STATUS));
2260 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2261 RREG32(GRBM_STATUS2));
2262 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2263 RREG32(GRBM_STATUS_SE0));
2264 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2265 RREG32(GRBM_STATUS_SE1));
2266 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2267 RREG32(GRBM_STATUS_SE2));
2268 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2269 RREG32(GRBM_STATUS_SE3));
2270 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2271 RREG32(SRBM_STATUS));
2272 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2273 RREG32(SRBM_STATUS2));
2274 evergreen_mc_stop(rdev, &save);
2275 if (radeon_mc_wait_for_idle(rdev)) {
2276 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2277 }
2278 /* Disable CP parsing/prefetching */
2279 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2280
2281 /* reset all the CPC blocks */
2282 grbm_reset = SOFT_RESET_CPG;
2283
2284 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2285 WREG32(GRBM_SOFT_RESET, grbm_reset);
2286 (void)RREG32(GRBM_SOFT_RESET);
2287 udelay(50);
2288 WREG32(GRBM_SOFT_RESET, 0);
2289 (void)RREG32(GRBM_SOFT_RESET);
2290 /* Wait a little for things to settle down */
2291 udelay(50);
2292 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2293 RREG32(GRBM_STATUS));
2294 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2295 RREG32(GRBM_STATUS2));
2296 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2297 RREG32(GRBM_STATUS_SE0));
2298 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2299 RREG32(GRBM_STATUS_SE1));
2300 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2301 RREG32(GRBM_STATUS_SE2));
2302 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2303 RREG32(GRBM_STATUS_SE3));
2304 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2305 RREG32(SRBM_STATUS));
2306 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2307 RREG32(SRBM_STATUS2));
2308 evergreen_mc_resume(rdev, &save);
2309 return 0;
2310}
2311
2312/**
2313 * cik_asic_reset - soft reset compute and gfx
2314 *
2315 * @rdev: radeon_device pointer
2316 *
2317 * Soft reset the CPC blocks (CIK).
2318 * XXX: make this more fine grained and only reset
2319 * what is necessary.
2320 * Returns 0 for success.
2321 */
2322int cik_asic_reset(struct radeon_device *rdev)
2323{
2324 int r;
2325
2326 r = cik_compute_gpu_soft_reset(rdev);
2327 if (r)
2328 dev_info(rdev->dev, "Compute reset failed!\n");
2329
2330 return cik_gfx_gpu_soft_reset(rdev);
2331}
Alex Deucher1c491652013-04-09 12:45:26 -04002332
2333/* MC */
2334/**
2335 * cik_mc_program - program the GPU memory controller
2336 *
2337 * @rdev: radeon_device pointer
2338 *
2339 * Set the location of vram, gart, and AGP in the GPU's
2340 * physical address space (CIK).
2341 */
2342static void cik_mc_program(struct radeon_device *rdev)
2343{
2344 struct evergreen_mc_save save;
2345 u32 tmp;
2346 int i, j;
2347
2348 /* Initialize HDP */
2349 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2350 WREG32((0x2c14 + j), 0x00000000);
2351 WREG32((0x2c18 + j), 0x00000000);
2352 WREG32((0x2c1c + j), 0x00000000);
2353 WREG32((0x2c20 + j), 0x00000000);
2354 WREG32((0x2c24 + j), 0x00000000);
2355 }
2356 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2357
2358 evergreen_mc_stop(rdev, &save);
2359 if (radeon_mc_wait_for_idle(rdev)) {
2360 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2361 }
2362 /* Lockout access through VGA aperture*/
2363 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2364 /* Update configuration */
2365 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2366 rdev->mc.vram_start >> 12);
2367 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2368 rdev->mc.vram_end >> 12);
2369 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2370 rdev->vram_scratch.gpu_addr >> 12);
2371 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2372 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2373 WREG32(MC_VM_FB_LOCATION, tmp);
2374 /* XXX double check these! */
2375 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2376 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2377 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2378 WREG32(MC_VM_AGP_BASE, 0);
2379 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2380 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2381 if (radeon_mc_wait_for_idle(rdev)) {
2382 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2383 }
2384 evergreen_mc_resume(rdev, &save);
2385 /* we need to own VRAM, so turn off the VGA renderer here
2386 * to stop it overwriting our objects */
2387 rv515_vga_render_disable(rdev);
2388}
2389
2390/**
2391 * cik_mc_init - initialize the memory controller driver params
2392 *
2393 * @rdev: radeon_device pointer
2394 *
2395 * Look up the amount of vram, vram width, and decide how to place
2396 * vram and gart within the GPU's physical address space (CIK).
2397 * Returns 0 for success.
2398 */
2399static int cik_mc_init(struct radeon_device *rdev)
2400{
2401 u32 tmp;
2402 int chansize, numchan;
2403
2404 /* Get VRAM informations */
2405 rdev->mc.vram_is_ddr = true;
2406 tmp = RREG32(MC_ARB_RAMCFG);
2407 if (tmp & CHANSIZE_MASK) {
2408 chansize = 64;
2409 } else {
2410 chansize = 32;
2411 }
2412 tmp = RREG32(MC_SHARED_CHMAP);
2413 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2414 case 0:
2415 default:
2416 numchan = 1;
2417 break;
2418 case 1:
2419 numchan = 2;
2420 break;
2421 case 2:
2422 numchan = 4;
2423 break;
2424 case 3:
2425 numchan = 8;
2426 break;
2427 case 4:
2428 numchan = 3;
2429 break;
2430 case 5:
2431 numchan = 6;
2432 break;
2433 case 6:
2434 numchan = 10;
2435 break;
2436 case 7:
2437 numchan = 12;
2438 break;
2439 case 8:
2440 numchan = 16;
2441 break;
2442 }
2443 rdev->mc.vram_width = numchan * chansize;
2444 /* Could aper size report 0 ? */
2445 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2446 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2447 /* size in MB on si */
2448 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2449 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2450 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2451 si_vram_gtt_location(rdev, &rdev->mc);
2452 radeon_update_bandwidth_info(rdev);
2453
2454 return 0;
2455}
2456
2457/*
2458 * GART
2459 * VMID 0 is the physical GPU addresses as used by the kernel.
2460 * VMIDs 1-15 are used for userspace clients and are handled
2461 * by the radeon vm/hsa code.
2462 */
2463/**
2464 * cik_pcie_gart_tlb_flush - gart tlb flush callback
2465 *
2466 * @rdev: radeon_device pointer
2467 *
2468 * Flush the TLB for the VMID 0 page table (CIK).
2469 */
2470void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
2471{
2472 /* flush hdp cache */
2473 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
2474
2475 /* bits 0-15 are the VM contexts0-15 */
2476 WREG32(VM_INVALIDATE_REQUEST, 0x1);
2477}
2478
2479/**
2480 * cik_pcie_gart_enable - gart enable
2481 *
2482 * @rdev: radeon_device pointer
2483 *
2484 * This sets up the TLBs, programs the page tables for VMID0,
2485 * sets up the hw for VMIDs 1-15 which are allocated on
2486 * demand, and sets up the global locations for the LDS, GDS,
2487 * and GPUVM for FSA64 clients (CIK).
2488 * Returns 0 for success, errors for failure.
2489 */
2490static int cik_pcie_gart_enable(struct radeon_device *rdev)
2491{
2492 int r, i;
2493
2494 if (rdev->gart.robj == NULL) {
2495 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2496 return -EINVAL;
2497 }
2498 r = radeon_gart_table_vram_pin(rdev);
2499 if (r)
2500 return r;
2501 radeon_gart_restore(rdev);
2502 /* Setup TLB control */
2503 WREG32(MC_VM_MX_L1_TLB_CNTL,
2504 (0xA << 7) |
2505 ENABLE_L1_TLB |
2506 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2507 ENABLE_ADVANCED_DRIVER_MODEL |
2508 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2509 /* Setup L2 cache */
2510 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2511 ENABLE_L2_FRAGMENT_PROCESSING |
2512 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2513 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2514 EFFECTIVE_L2_QUEUE_SIZE(7) |
2515 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2516 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2517 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2518 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2519 /* setup context0 */
2520 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2521 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2522 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2523 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2524 (u32)(rdev->dummy_page.addr >> 12));
2525 WREG32(VM_CONTEXT0_CNTL2, 0);
2526 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2527 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2528
2529 WREG32(0x15D4, 0);
2530 WREG32(0x15D8, 0);
2531 WREG32(0x15DC, 0);
2532
2533 /* empty context1-15 */
2534 /* FIXME start with 4G, once using 2 level pt switch to full
2535 * vm size space
2536 */
2537 /* set vm size, must be a multiple of 4 */
2538 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2539 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2540 for (i = 1; i < 16; i++) {
2541 if (i < 8)
2542 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2543 rdev->gart.table_addr >> 12);
2544 else
2545 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2546 rdev->gart.table_addr >> 12);
2547 }
2548
2549 /* enable context1-15 */
2550 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2551 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04002552 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04002553 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04002554 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2555 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2556 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2557 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2558 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2559 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2560 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2561 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2562 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2563 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2564 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2565 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04002566
2567 /* TC cache setup ??? */
2568 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
2569 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
2570 WREG32(TC_CFG_L1_STORE_POLICY, 0);
2571
2572 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
2573 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
2574 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
2575 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
2576 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
2577
2578 WREG32(TC_CFG_L1_VOLATILE, 0);
2579 WREG32(TC_CFG_L2_VOLATILE, 0);
2580
2581 if (rdev->family == CHIP_KAVERI) {
2582 u32 tmp = RREG32(CHUB_CONTROL);
2583 tmp &= ~BYPASS_VM;
2584 WREG32(CHUB_CONTROL, tmp);
2585 }
2586
2587 /* XXX SH_MEM regs */
2588 /* where to put LDS, scratch, GPUVM in FSA64 space */
2589 for (i = 0; i < 16; i++) {
2590 WREG32(SRBM_GFX_CNTL, VMID(i));
2591 WREG32(SH_MEM_CONFIG, 0);
2592 WREG32(SH_MEM_APE1_BASE, 1);
2593 WREG32(SH_MEM_APE1_LIMIT, 0);
2594 WREG32(SH_MEM_BASES, 0);
2595 }
2596 WREG32(SRBM_GFX_CNTL, 0);
2597
2598 cik_pcie_gart_tlb_flush(rdev);
2599 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2600 (unsigned)(rdev->mc.gtt_size >> 20),
2601 (unsigned long long)rdev->gart.table_addr);
2602 rdev->gart.ready = true;
2603 return 0;
2604}
2605
2606/**
2607 * cik_pcie_gart_disable - gart disable
2608 *
2609 * @rdev: radeon_device pointer
2610 *
2611 * This disables all VM page table (CIK).
2612 */
2613static void cik_pcie_gart_disable(struct radeon_device *rdev)
2614{
2615 /* Disable all tables */
2616 WREG32(VM_CONTEXT0_CNTL, 0);
2617 WREG32(VM_CONTEXT1_CNTL, 0);
2618 /* Setup TLB control */
2619 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2620 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2621 /* Setup L2 cache */
2622 WREG32(VM_L2_CNTL,
2623 ENABLE_L2_FRAGMENT_PROCESSING |
2624 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2625 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2626 EFFECTIVE_L2_QUEUE_SIZE(7) |
2627 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2628 WREG32(VM_L2_CNTL2, 0);
2629 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2630 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
2631 radeon_gart_table_vram_unpin(rdev);
2632}
2633
2634/**
2635 * cik_pcie_gart_fini - vm fini callback
2636 *
2637 * @rdev: radeon_device pointer
2638 *
2639 * Tears down the driver GART/VM setup (CIK).
2640 */
2641static void cik_pcie_gart_fini(struct radeon_device *rdev)
2642{
2643 cik_pcie_gart_disable(rdev);
2644 radeon_gart_table_vram_free(rdev);
2645 radeon_gart_fini(rdev);
2646}
2647
2648/* vm parser */
2649/**
2650 * cik_ib_parse - vm ib_parse callback
2651 *
2652 * @rdev: radeon_device pointer
2653 * @ib: indirect buffer pointer
2654 *
2655 * CIK uses hw IB checking so this is a nop (CIK).
2656 */
2657int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2658{
2659 return 0;
2660}
2661
2662/*
2663 * vm
2664 * VMID 0 is the physical GPU addresses as used by the kernel.
2665 * VMIDs 1-15 are used for userspace clients and are handled
2666 * by the radeon vm/hsa code.
2667 */
2668/**
2669 * cik_vm_init - cik vm init callback
2670 *
2671 * @rdev: radeon_device pointer
2672 *
2673 * Inits cik specific vm parameters (number of VMs, base of vram for
2674 * VMIDs 1-15) (CIK).
2675 * Returns 0 for success.
2676 */
2677int cik_vm_init(struct radeon_device *rdev)
2678{
2679 /* number of VMs */
2680 rdev->vm_manager.nvm = 16;
2681 /* base offset of vram pages */
2682 if (rdev->flags & RADEON_IS_IGP) {
2683 u64 tmp = RREG32(MC_VM_FB_OFFSET);
2684 tmp <<= 22;
2685 rdev->vm_manager.vram_base_offset = tmp;
2686 } else
2687 rdev->vm_manager.vram_base_offset = 0;
2688
2689 return 0;
2690}
2691
2692/**
2693 * cik_vm_fini - cik vm fini callback
2694 *
2695 * @rdev: radeon_device pointer
2696 *
2697 * Tear down any asic specific VM setup (CIK).
2698 */
2699void cik_vm_fini(struct radeon_device *rdev)
2700{
2701}
2702
Alex Deucherf96ab482012-08-31 10:37:47 -04002703/**
2704 * cik_vm_flush - cik vm flush using the CP
2705 *
2706 * @rdev: radeon_device pointer
2707 *
2708 * Update the page table base and flush the VM TLB
2709 * using the CP (CIK).
2710 */
2711void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2712{
2713 struct radeon_ring *ring = &rdev->ring[ridx];
2714
2715 if (vm == NULL)
2716 return;
2717
2718 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2719 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2720 WRITE_DATA_DST_SEL(0)));
2721 if (vm->id < 8) {
2722 radeon_ring_write(ring,
2723 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2724 } else {
2725 radeon_ring_write(ring,
2726 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2727 }
2728 radeon_ring_write(ring, 0);
2729 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2730
2731 /* update SH_MEM_* regs */
2732 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2733 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2734 WRITE_DATA_DST_SEL(0)));
2735 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2736 radeon_ring_write(ring, 0);
2737 radeon_ring_write(ring, VMID(vm->id));
2738
2739 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
2740 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2741 WRITE_DATA_DST_SEL(0)));
2742 radeon_ring_write(ring, SH_MEM_BASES >> 2);
2743 radeon_ring_write(ring, 0);
2744
2745 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
2746 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
2747 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
2748 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
2749
2750 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2751 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2752 WRITE_DATA_DST_SEL(0)));
2753 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
2754 radeon_ring_write(ring, 0);
2755 radeon_ring_write(ring, VMID(0));
2756
2757 /* HDP flush */
2758 /* We should be using the WAIT_REG_MEM packet here like in
2759 * cik_fence_ring_emit(), but it causes the CP to hang in this
2760 * context...
2761 */
2762 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2763 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2764 WRITE_DATA_DST_SEL(0)));
2765 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2766 radeon_ring_write(ring, 0);
2767 radeon_ring_write(ring, 0);
2768
2769 /* bits 0-15 are the VM contexts0-15 */
2770 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2771 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2772 WRITE_DATA_DST_SEL(0)));
2773 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
2774 radeon_ring_write(ring, 0);
2775 radeon_ring_write(ring, 1 << vm->id);
2776
2777 /* sync PFP to ME, otherwise we might get invalid PFP reads */
2778 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2779 radeon_ring_write(ring, 0x0);
2780}
2781
Alex Deucherf6796ca2012-11-09 10:44:08 -05002782/*
2783 * RLC
2784 * The RLC is a multi-purpose microengine that handles a
2785 * variety of functions, the most important of which is
2786 * the interrupt controller.
2787 */
2788/**
2789 * cik_rlc_stop - stop the RLC ME
2790 *
2791 * @rdev: radeon_device pointer
2792 *
2793 * Halt the RLC ME (MicroEngine) (CIK).
2794 */
2795static void cik_rlc_stop(struct radeon_device *rdev)
2796{
2797 int i, j, k;
2798 u32 mask, tmp;
2799
2800 tmp = RREG32(CP_INT_CNTL_RING0);
2801 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2802 WREG32(CP_INT_CNTL_RING0, tmp);
2803
2804 RREG32(CB_CGTT_SCLK_CTRL);
2805 RREG32(CB_CGTT_SCLK_CTRL);
2806 RREG32(CB_CGTT_SCLK_CTRL);
2807 RREG32(CB_CGTT_SCLK_CTRL);
2808
2809 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
2810 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
2811
2812 WREG32(RLC_CNTL, 0);
2813
2814 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
2815 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
2816 cik_select_se_sh(rdev, i, j);
2817 for (k = 0; k < rdev->usec_timeout; k++) {
2818 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
2819 break;
2820 udelay(1);
2821 }
2822 }
2823 }
2824 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2825
2826 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
2827 for (k = 0; k < rdev->usec_timeout; k++) {
2828 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2829 break;
2830 udelay(1);
2831 }
2832}
2833
2834/**
2835 * cik_rlc_start - start the RLC ME
2836 *
2837 * @rdev: radeon_device pointer
2838 *
2839 * Unhalt the RLC ME (MicroEngine) (CIK).
2840 */
2841static void cik_rlc_start(struct radeon_device *rdev)
2842{
2843 u32 tmp;
2844
2845 WREG32(RLC_CNTL, RLC_ENABLE);
2846
2847 tmp = RREG32(CP_INT_CNTL_RING0);
2848 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2849 WREG32(CP_INT_CNTL_RING0, tmp);
2850
2851 udelay(50);
2852}
2853
2854/**
2855 * cik_rlc_resume - setup the RLC hw
2856 *
2857 * @rdev: radeon_device pointer
2858 *
2859 * Initialize the RLC registers, load the ucode,
2860 * and start the RLC (CIK).
2861 * Returns 0 for success, -EINVAL if the ucode is not available.
2862 */
2863static int cik_rlc_resume(struct radeon_device *rdev)
2864{
2865 u32 i, size;
2866 u32 clear_state_info[3];
2867 const __be32 *fw_data;
2868
2869 if (!rdev->rlc_fw)
2870 return -EINVAL;
2871
2872 switch (rdev->family) {
2873 case CHIP_BONAIRE:
2874 default:
2875 size = BONAIRE_RLC_UCODE_SIZE;
2876 break;
2877 case CHIP_KAVERI:
2878 size = KV_RLC_UCODE_SIZE;
2879 break;
2880 case CHIP_KABINI:
2881 size = KB_RLC_UCODE_SIZE;
2882 break;
2883 }
2884
2885 cik_rlc_stop(rdev);
2886
2887 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
2888 RREG32(GRBM_SOFT_RESET);
2889 udelay(50);
2890 WREG32(GRBM_SOFT_RESET, 0);
2891 RREG32(GRBM_SOFT_RESET);
2892 udelay(50);
2893
2894 WREG32(RLC_LB_CNTR_INIT, 0);
2895 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
2896
2897 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2898 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
2899 WREG32(RLC_LB_PARAMS, 0x00600408);
2900 WREG32(RLC_LB_CNTL, 0x80000004);
2901
2902 WREG32(RLC_MC_CNTL, 0);
2903 WREG32(RLC_UCODE_CNTL, 0);
2904
2905 fw_data = (const __be32 *)rdev->rlc_fw->data;
2906 WREG32(RLC_GPM_UCODE_ADDR, 0);
2907 for (i = 0; i < size; i++)
2908 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
2909 WREG32(RLC_GPM_UCODE_ADDR, 0);
2910
2911 /* XXX */
2912 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
2913 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
2914 clear_state_info[2] = 0;//cik_default_size;
2915 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
2916 for (i = 0; i < 3; i++)
2917 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
2918 WREG32(RLC_DRIVER_DMA_STATUS, 0);
2919
2920 cik_rlc_start(rdev);
2921
2922 return 0;
2923}
Alex Deuchera59781b2012-11-09 10:45:57 -05002924
2925/*
2926 * Interrupts
2927 * Starting with r6xx, interrupts are handled via a ring buffer.
2928 * Ring buffers are areas of GPU accessible memory that the GPU
2929 * writes interrupt vectors into and the host reads vectors out of.
2930 * There is a rptr (read pointer) that determines where the
2931 * host is currently reading, and a wptr (write pointer)
2932 * which determines where the GPU has written. When the
2933 * pointers are equal, the ring is idle. When the GPU
2934 * writes vectors to the ring buffer, it increments the
2935 * wptr. When there is an interrupt, the host then starts
2936 * fetching commands and processing them until the pointers are
2937 * equal again at which point it updates the rptr.
2938 */
2939
2940/**
2941 * cik_enable_interrupts - Enable the interrupt ring buffer
2942 *
2943 * @rdev: radeon_device pointer
2944 *
2945 * Enable the interrupt ring buffer (CIK).
2946 */
2947static void cik_enable_interrupts(struct radeon_device *rdev)
2948{
2949 u32 ih_cntl = RREG32(IH_CNTL);
2950 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2951
2952 ih_cntl |= ENABLE_INTR;
2953 ih_rb_cntl |= IH_RB_ENABLE;
2954 WREG32(IH_CNTL, ih_cntl);
2955 WREG32(IH_RB_CNTL, ih_rb_cntl);
2956 rdev->ih.enabled = true;
2957}
2958
2959/**
2960 * cik_disable_interrupts - Disable the interrupt ring buffer
2961 *
2962 * @rdev: radeon_device pointer
2963 *
2964 * Disable the interrupt ring buffer (CIK).
2965 */
2966static void cik_disable_interrupts(struct radeon_device *rdev)
2967{
2968 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2969 u32 ih_cntl = RREG32(IH_CNTL);
2970
2971 ih_rb_cntl &= ~IH_RB_ENABLE;
2972 ih_cntl &= ~ENABLE_INTR;
2973 WREG32(IH_RB_CNTL, ih_rb_cntl);
2974 WREG32(IH_CNTL, ih_cntl);
2975 /* set rptr, wptr to 0 */
2976 WREG32(IH_RB_RPTR, 0);
2977 WREG32(IH_RB_WPTR, 0);
2978 rdev->ih.enabled = false;
2979 rdev->ih.rptr = 0;
2980}
2981
2982/**
2983 * cik_disable_interrupt_state - Disable all interrupt sources
2984 *
2985 * @rdev: radeon_device pointer
2986 *
2987 * Clear all interrupt enable bits used by the driver (CIK).
2988 */
2989static void cik_disable_interrupt_state(struct radeon_device *rdev)
2990{
2991 u32 tmp;
2992
2993 /* gfx ring */
2994 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2995 /* compute queues */
2996 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
2997 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
2998 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
2999 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3000 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3001 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3002 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3003 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3004 /* grbm */
3005 WREG32(GRBM_INT_CNTL, 0);
3006 /* vline/vblank, etc. */
3007 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3008 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3009 if (rdev->num_crtc >= 4) {
3010 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3011 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3012 }
3013 if (rdev->num_crtc >= 6) {
3014 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3015 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3016 }
3017
3018 /* dac hotplug */
3019 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3020
3021 /* digital hotplug */
3022 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3023 WREG32(DC_HPD1_INT_CONTROL, tmp);
3024 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3025 WREG32(DC_HPD2_INT_CONTROL, tmp);
3026 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3027 WREG32(DC_HPD3_INT_CONTROL, tmp);
3028 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3029 WREG32(DC_HPD4_INT_CONTROL, tmp);
3030 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3031 WREG32(DC_HPD5_INT_CONTROL, tmp);
3032 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3033 WREG32(DC_HPD6_INT_CONTROL, tmp);
3034
3035}
3036
3037/**
3038 * cik_irq_init - init and enable the interrupt ring
3039 *
3040 * @rdev: radeon_device pointer
3041 *
3042 * Allocate a ring buffer for the interrupt controller,
3043 * enable the RLC, disable interrupts, enable the IH
3044 * ring buffer and enable it (CIK).
3045 * Called at device load and reume.
3046 * Returns 0 for success, errors for failure.
3047 */
3048static int cik_irq_init(struct radeon_device *rdev)
3049{
3050 int ret = 0;
3051 int rb_bufsz;
3052 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3053
3054 /* allocate ring */
3055 ret = r600_ih_ring_alloc(rdev);
3056 if (ret)
3057 return ret;
3058
3059 /* disable irqs */
3060 cik_disable_interrupts(rdev);
3061
3062 /* init rlc */
3063 ret = cik_rlc_resume(rdev);
3064 if (ret) {
3065 r600_ih_ring_fini(rdev);
3066 return ret;
3067 }
3068
3069 /* setup interrupt control */
3070 /* XXX this should actually be a bus address, not an MC address. same on older asics */
3071 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3072 interrupt_cntl = RREG32(INTERRUPT_CNTL);
3073 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3074 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3075 */
3076 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3077 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3078 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3079 WREG32(INTERRUPT_CNTL, interrupt_cntl);
3080
3081 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3082 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3083
3084 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3085 IH_WPTR_OVERFLOW_CLEAR |
3086 (rb_bufsz << 1));
3087
3088 if (rdev->wb.enabled)
3089 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3090
3091 /* set the writeback address whether it's enabled or not */
3092 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3093 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3094
3095 WREG32(IH_RB_CNTL, ih_rb_cntl);
3096
3097 /* set rptr, wptr to 0 */
3098 WREG32(IH_RB_RPTR, 0);
3099 WREG32(IH_RB_WPTR, 0);
3100
3101 /* Default settings for IH_CNTL (disabled at first) */
3102 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3103 /* RPTR_REARM only works if msi's are enabled */
3104 if (rdev->msi_enabled)
3105 ih_cntl |= RPTR_REARM;
3106 WREG32(IH_CNTL, ih_cntl);
3107
3108 /* force the active interrupt state to all disabled */
3109 cik_disable_interrupt_state(rdev);
3110
3111 pci_set_master(rdev->pdev);
3112
3113 /* enable irqs */
3114 cik_enable_interrupts(rdev);
3115
3116 return ret;
3117}
3118
3119/**
3120 * cik_irq_set - enable/disable interrupt sources
3121 *
3122 * @rdev: radeon_device pointer
3123 *
3124 * Enable interrupt sources on the GPU (vblanks, hpd,
3125 * etc.) (CIK).
3126 * Returns 0 for success, errors for failure.
3127 */
3128int cik_irq_set(struct radeon_device *rdev)
3129{
3130 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
3131 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
3132 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3133 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3134 u32 grbm_int_cntl = 0;
3135
3136 if (!rdev->irq.installed) {
3137 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3138 return -EINVAL;
3139 }
3140 /* don't enable anything if the ih is disabled */
3141 if (!rdev->ih.enabled) {
3142 cik_disable_interrupts(rdev);
3143 /* force the active interrupt state to all disabled */
3144 cik_disable_interrupt_state(rdev);
3145 return 0;
3146 }
3147
3148 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3149 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3150 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3151 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3152 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3153 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3154
3155 /* enable CP interrupts on all rings */
3156 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3157 DRM_DEBUG("cik_irq_set: sw int gfx\n");
3158 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3159 }
3160 /* TODO: compute queues! */
3161 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
3162
3163 if (rdev->irq.crtc_vblank_int[0] ||
3164 atomic_read(&rdev->irq.pflip[0])) {
3165 DRM_DEBUG("cik_irq_set: vblank 0\n");
3166 crtc1 |= VBLANK_INTERRUPT_MASK;
3167 }
3168 if (rdev->irq.crtc_vblank_int[1] ||
3169 atomic_read(&rdev->irq.pflip[1])) {
3170 DRM_DEBUG("cik_irq_set: vblank 1\n");
3171 crtc2 |= VBLANK_INTERRUPT_MASK;
3172 }
3173 if (rdev->irq.crtc_vblank_int[2] ||
3174 atomic_read(&rdev->irq.pflip[2])) {
3175 DRM_DEBUG("cik_irq_set: vblank 2\n");
3176 crtc3 |= VBLANK_INTERRUPT_MASK;
3177 }
3178 if (rdev->irq.crtc_vblank_int[3] ||
3179 atomic_read(&rdev->irq.pflip[3])) {
3180 DRM_DEBUG("cik_irq_set: vblank 3\n");
3181 crtc4 |= VBLANK_INTERRUPT_MASK;
3182 }
3183 if (rdev->irq.crtc_vblank_int[4] ||
3184 atomic_read(&rdev->irq.pflip[4])) {
3185 DRM_DEBUG("cik_irq_set: vblank 4\n");
3186 crtc5 |= VBLANK_INTERRUPT_MASK;
3187 }
3188 if (rdev->irq.crtc_vblank_int[5] ||
3189 atomic_read(&rdev->irq.pflip[5])) {
3190 DRM_DEBUG("cik_irq_set: vblank 5\n");
3191 crtc6 |= VBLANK_INTERRUPT_MASK;
3192 }
3193 if (rdev->irq.hpd[0]) {
3194 DRM_DEBUG("cik_irq_set: hpd 1\n");
3195 hpd1 |= DC_HPDx_INT_EN;
3196 }
3197 if (rdev->irq.hpd[1]) {
3198 DRM_DEBUG("cik_irq_set: hpd 2\n");
3199 hpd2 |= DC_HPDx_INT_EN;
3200 }
3201 if (rdev->irq.hpd[2]) {
3202 DRM_DEBUG("cik_irq_set: hpd 3\n");
3203 hpd3 |= DC_HPDx_INT_EN;
3204 }
3205 if (rdev->irq.hpd[3]) {
3206 DRM_DEBUG("cik_irq_set: hpd 4\n");
3207 hpd4 |= DC_HPDx_INT_EN;
3208 }
3209 if (rdev->irq.hpd[4]) {
3210 DRM_DEBUG("cik_irq_set: hpd 5\n");
3211 hpd5 |= DC_HPDx_INT_EN;
3212 }
3213 if (rdev->irq.hpd[5]) {
3214 DRM_DEBUG("cik_irq_set: hpd 6\n");
3215 hpd6 |= DC_HPDx_INT_EN;
3216 }
3217
3218 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3219
3220 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3221
3222 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3223 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3224 if (rdev->num_crtc >= 4) {
3225 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3226 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3227 }
3228 if (rdev->num_crtc >= 6) {
3229 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3230 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3231 }
3232
3233 WREG32(DC_HPD1_INT_CONTROL, hpd1);
3234 WREG32(DC_HPD2_INT_CONTROL, hpd2);
3235 WREG32(DC_HPD3_INT_CONTROL, hpd3);
3236 WREG32(DC_HPD4_INT_CONTROL, hpd4);
3237 WREG32(DC_HPD5_INT_CONTROL, hpd5);
3238 WREG32(DC_HPD6_INT_CONTROL, hpd6);
3239
3240 return 0;
3241}
3242
3243/**
3244 * cik_irq_ack - ack interrupt sources
3245 *
3246 * @rdev: radeon_device pointer
3247 *
3248 * Ack interrupt sources on the GPU (vblanks, hpd,
3249 * etc.) (CIK). Certain interrupts sources are sw
3250 * generated and do not require an explicit ack.
3251 */
3252static inline void cik_irq_ack(struct radeon_device *rdev)
3253{
3254 u32 tmp;
3255
3256 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3257 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3258 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3259 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3260 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3261 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3262 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
3263
3264 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
3265 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3266 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
3267 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3268 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3269 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3270 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3271 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3272
3273 if (rdev->num_crtc >= 4) {
3274 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3275 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3276 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3277 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3278 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3279 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3280 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3281 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3282 }
3283
3284 if (rdev->num_crtc >= 6) {
3285 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3286 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3287 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3288 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3289 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3290 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3291 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3292 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3293 }
3294
3295 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3296 tmp = RREG32(DC_HPD1_INT_CONTROL);
3297 tmp |= DC_HPDx_INT_ACK;
3298 WREG32(DC_HPD1_INT_CONTROL, tmp);
3299 }
3300 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3301 tmp = RREG32(DC_HPD2_INT_CONTROL);
3302 tmp |= DC_HPDx_INT_ACK;
3303 WREG32(DC_HPD2_INT_CONTROL, tmp);
3304 }
3305 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3306 tmp = RREG32(DC_HPD3_INT_CONTROL);
3307 tmp |= DC_HPDx_INT_ACK;
3308 WREG32(DC_HPD3_INT_CONTROL, tmp);
3309 }
3310 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3311 tmp = RREG32(DC_HPD4_INT_CONTROL);
3312 tmp |= DC_HPDx_INT_ACK;
3313 WREG32(DC_HPD4_INT_CONTROL, tmp);
3314 }
3315 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3316 tmp = RREG32(DC_HPD5_INT_CONTROL);
3317 tmp |= DC_HPDx_INT_ACK;
3318 WREG32(DC_HPD5_INT_CONTROL, tmp);
3319 }
3320 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3321 tmp = RREG32(DC_HPD5_INT_CONTROL);
3322 tmp |= DC_HPDx_INT_ACK;
3323 WREG32(DC_HPD6_INT_CONTROL, tmp);
3324 }
3325}
3326
3327/**
3328 * cik_irq_disable - disable interrupts
3329 *
3330 * @rdev: radeon_device pointer
3331 *
3332 * Disable interrupts on the hw (CIK).
3333 */
3334static void cik_irq_disable(struct radeon_device *rdev)
3335{
3336 cik_disable_interrupts(rdev);
3337 /* Wait and acknowledge irq */
3338 mdelay(1);
3339 cik_irq_ack(rdev);
3340 cik_disable_interrupt_state(rdev);
3341}
3342
3343/**
3344 * cik_irq_disable - disable interrupts for suspend
3345 *
3346 * @rdev: radeon_device pointer
3347 *
3348 * Disable interrupts and stop the RLC (CIK).
3349 * Used for suspend.
3350 */
3351static void cik_irq_suspend(struct radeon_device *rdev)
3352{
3353 cik_irq_disable(rdev);
3354 cik_rlc_stop(rdev);
3355}
3356
3357/**
3358 * cik_irq_fini - tear down interrupt support
3359 *
3360 * @rdev: radeon_device pointer
3361 *
3362 * Disable interrupts on the hw and free the IH ring
3363 * buffer (CIK).
3364 * Used for driver unload.
3365 */
3366static void cik_irq_fini(struct radeon_device *rdev)
3367{
3368 cik_irq_suspend(rdev);
3369 r600_ih_ring_fini(rdev);
3370}
3371
3372/**
3373 * cik_get_ih_wptr - get the IH ring buffer wptr
3374 *
3375 * @rdev: radeon_device pointer
3376 *
3377 * Get the IH ring buffer wptr from either the register
3378 * or the writeback memory buffer (CIK). Also check for
3379 * ring buffer overflow and deal with it.
3380 * Used by cik_irq_process().
3381 * Returns the value of the wptr.
3382 */
3383static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
3384{
3385 u32 wptr, tmp;
3386
3387 if (rdev->wb.enabled)
3388 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3389 else
3390 wptr = RREG32(IH_RB_WPTR);
3391
3392 if (wptr & RB_OVERFLOW) {
3393 /* When a ring buffer overflow happen start parsing interrupt
3394 * from the last not overwritten vector (wptr + 16). Hopefully
3395 * this should allow us to catchup.
3396 */
3397 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3398 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3399 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3400 tmp = RREG32(IH_RB_CNTL);
3401 tmp |= IH_WPTR_OVERFLOW_CLEAR;
3402 WREG32(IH_RB_CNTL, tmp);
3403 }
3404 return (wptr & rdev->ih.ptr_mask);
3405}
3406
3407/* CIK IV Ring
3408 * Each IV ring entry is 128 bits:
3409 * [7:0] - interrupt source id
3410 * [31:8] - reserved
3411 * [59:32] - interrupt source data
3412 * [63:60] - reserved
3413 * [71:64] - RINGID: ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
3414 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
3415 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
3416 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
3417 * PIPE_ID - ME0 0=3D
3418 * - ME1&2 compute dispatcher (4 pipes each)
3419 * [79:72] - VMID
3420 * [95:80] - PASID
3421 * [127:96] - reserved
3422 */
3423/**
3424 * cik_irq_process - interrupt handler
3425 *
3426 * @rdev: radeon_device pointer
3427 *
3428 * Interrupt hander (CIK). Walk the IH ring,
3429 * ack interrupts and schedule work to handle
3430 * interrupt events.
3431 * Returns irq process return code.
3432 */
3433int cik_irq_process(struct radeon_device *rdev)
3434{
3435 u32 wptr;
3436 u32 rptr;
3437 u32 src_id, src_data, ring_id;
3438 u8 me_id, pipe_id, queue_id;
3439 u32 ring_index;
3440 bool queue_hotplug = false;
3441 bool queue_reset = false;
3442
3443 if (!rdev->ih.enabled || rdev->shutdown)
3444 return IRQ_NONE;
3445
3446 wptr = cik_get_ih_wptr(rdev);
3447
3448restart_ih:
3449 /* is somebody else already processing irqs? */
3450 if (atomic_xchg(&rdev->ih.lock, 1))
3451 return IRQ_NONE;
3452
3453 rptr = rdev->ih.rptr;
3454 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3455
3456 /* Order reading of wptr vs. reading of IH ring data */
3457 rmb();
3458
3459 /* display interrupts */
3460 cik_irq_ack(rdev);
3461
3462 while (rptr != wptr) {
3463 /* wptr/rptr are in bytes! */
3464 ring_index = rptr / 4;
3465 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3466 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3467 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3468 /* XXX check the bitfield order! */
3469 me_id = (ring_id & 0x60) >> 5;
3470 pipe_id = (ring_id & 0x18) >> 3;
3471 queue_id = (ring_id & 0x7) >> 0;
3472
3473 switch (src_id) {
3474 case 1: /* D1 vblank/vline */
3475 switch (src_data) {
3476 case 0: /* D1 vblank */
3477 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
3478 if (rdev->irq.crtc_vblank_int[0]) {
3479 drm_handle_vblank(rdev->ddev, 0);
3480 rdev->pm.vblank_sync = true;
3481 wake_up(&rdev->irq.vblank_queue);
3482 }
3483 if (atomic_read(&rdev->irq.pflip[0]))
3484 radeon_crtc_handle_flip(rdev, 0);
3485 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3486 DRM_DEBUG("IH: D1 vblank\n");
3487 }
3488 break;
3489 case 1: /* D1 vline */
3490 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
3491 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3492 DRM_DEBUG("IH: D1 vline\n");
3493 }
3494 break;
3495 default:
3496 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3497 break;
3498 }
3499 break;
3500 case 2: /* D2 vblank/vline */
3501 switch (src_data) {
3502 case 0: /* D2 vblank */
3503 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3504 if (rdev->irq.crtc_vblank_int[1]) {
3505 drm_handle_vblank(rdev->ddev, 1);
3506 rdev->pm.vblank_sync = true;
3507 wake_up(&rdev->irq.vblank_queue);
3508 }
3509 if (atomic_read(&rdev->irq.pflip[1]))
3510 radeon_crtc_handle_flip(rdev, 1);
3511 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3512 DRM_DEBUG("IH: D2 vblank\n");
3513 }
3514 break;
3515 case 1: /* D2 vline */
3516 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3517 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3518 DRM_DEBUG("IH: D2 vline\n");
3519 }
3520 break;
3521 default:
3522 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3523 break;
3524 }
3525 break;
3526 case 3: /* D3 vblank/vline */
3527 switch (src_data) {
3528 case 0: /* D3 vblank */
3529 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3530 if (rdev->irq.crtc_vblank_int[2]) {
3531 drm_handle_vblank(rdev->ddev, 2);
3532 rdev->pm.vblank_sync = true;
3533 wake_up(&rdev->irq.vblank_queue);
3534 }
3535 if (atomic_read(&rdev->irq.pflip[2]))
3536 radeon_crtc_handle_flip(rdev, 2);
3537 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3538 DRM_DEBUG("IH: D3 vblank\n");
3539 }
3540 break;
3541 case 1: /* D3 vline */
3542 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3543 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3544 DRM_DEBUG("IH: D3 vline\n");
3545 }
3546 break;
3547 default:
3548 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3549 break;
3550 }
3551 break;
3552 case 4: /* D4 vblank/vline */
3553 switch (src_data) {
3554 case 0: /* D4 vblank */
3555 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3556 if (rdev->irq.crtc_vblank_int[3]) {
3557 drm_handle_vblank(rdev->ddev, 3);
3558 rdev->pm.vblank_sync = true;
3559 wake_up(&rdev->irq.vblank_queue);
3560 }
3561 if (atomic_read(&rdev->irq.pflip[3]))
3562 radeon_crtc_handle_flip(rdev, 3);
3563 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3564 DRM_DEBUG("IH: D4 vblank\n");
3565 }
3566 break;
3567 case 1: /* D4 vline */
3568 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3569 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3570 DRM_DEBUG("IH: D4 vline\n");
3571 }
3572 break;
3573 default:
3574 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3575 break;
3576 }
3577 break;
3578 case 5: /* D5 vblank/vline */
3579 switch (src_data) {
3580 case 0: /* D5 vblank */
3581 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3582 if (rdev->irq.crtc_vblank_int[4]) {
3583 drm_handle_vblank(rdev->ddev, 4);
3584 rdev->pm.vblank_sync = true;
3585 wake_up(&rdev->irq.vblank_queue);
3586 }
3587 if (atomic_read(&rdev->irq.pflip[4]))
3588 radeon_crtc_handle_flip(rdev, 4);
3589 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3590 DRM_DEBUG("IH: D5 vblank\n");
3591 }
3592 break;
3593 case 1: /* D5 vline */
3594 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3595 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3596 DRM_DEBUG("IH: D5 vline\n");
3597 }
3598 break;
3599 default:
3600 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3601 break;
3602 }
3603 break;
3604 case 6: /* D6 vblank/vline */
3605 switch (src_data) {
3606 case 0: /* D6 vblank */
3607 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3608 if (rdev->irq.crtc_vblank_int[5]) {
3609 drm_handle_vblank(rdev->ddev, 5);
3610 rdev->pm.vblank_sync = true;
3611 wake_up(&rdev->irq.vblank_queue);
3612 }
3613 if (atomic_read(&rdev->irq.pflip[5]))
3614 radeon_crtc_handle_flip(rdev, 5);
3615 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3616 DRM_DEBUG("IH: D6 vblank\n");
3617 }
3618 break;
3619 case 1: /* D6 vline */
3620 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3621 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3622 DRM_DEBUG("IH: D6 vline\n");
3623 }
3624 break;
3625 default:
3626 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3627 break;
3628 }
3629 break;
3630 case 42: /* HPD hotplug */
3631 switch (src_data) {
3632 case 0:
3633 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
3634 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
3635 queue_hotplug = true;
3636 DRM_DEBUG("IH: HPD1\n");
3637 }
3638 break;
3639 case 1:
3640 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
3641 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3642 queue_hotplug = true;
3643 DRM_DEBUG("IH: HPD2\n");
3644 }
3645 break;
3646 case 2:
3647 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3648 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3649 queue_hotplug = true;
3650 DRM_DEBUG("IH: HPD3\n");
3651 }
3652 break;
3653 case 3:
3654 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3655 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3656 queue_hotplug = true;
3657 DRM_DEBUG("IH: HPD4\n");
3658 }
3659 break;
3660 case 4:
3661 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3662 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3663 queue_hotplug = true;
3664 DRM_DEBUG("IH: HPD5\n");
3665 }
3666 break;
3667 case 5:
3668 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3669 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3670 queue_hotplug = true;
3671 DRM_DEBUG("IH: HPD6\n");
3672 }
3673 break;
3674 default:
3675 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3676 break;
3677 }
3678 break;
3679 case 176: /* GFX RB CP_INT */
3680 case 177: /* GFX IB CP_INT */
3681 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3682 break;
3683 case 181: /* CP EOP event */
3684 DRM_DEBUG("IH: CP EOP\n");
3685 switch (me_id) {
3686 case 0:
3687 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3688 break;
3689 case 1:
3690 /* XXX compute */
3691 break;
3692 case 2:
3693 /* XXX compute */
3694 break;
3695 }
3696 break;
3697 case 184: /* CP Privileged reg access */
3698 DRM_ERROR("Illegal register access in command stream\n");
3699 /* XXX check the bitfield order! */
3700 me_id = (ring_id & 0x60) >> 5;
3701 pipe_id = (ring_id & 0x18) >> 3;
3702 queue_id = (ring_id & 0x7) >> 0;
3703 switch (me_id) {
3704 case 0:
3705 /* This results in a full GPU reset, but all we need to do is soft
3706 * reset the CP for gfx
3707 */
3708 queue_reset = true;
3709 break;
3710 case 1:
3711 /* XXX compute */
3712 break;
3713 case 2:
3714 /* XXX compute */
3715 break;
3716 }
3717 break;
3718 case 185: /* CP Privileged inst */
3719 DRM_ERROR("Illegal instruction in command stream\n");
3720 switch (me_id) {
3721 case 0:
3722 /* This results in a full GPU reset, but all we need to do is soft
3723 * reset the CP for gfx
3724 */
3725 queue_reset = true;
3726 break;
3727 case 1:
3728 /* XXX compute */
3729 break;
3730 case 2:
3731 /* XXX compute */
3732 break;
3733 }
3734 break;
3735 case 233: /* GUI IDLE */
3736 DRM_DEBUG("IH: GUI idle\n");
3737 break;
3738 default:
3739 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3740 break;
3741 }
3742
3743 /* wptr/rptr are in bytes! */
3744 rptr += 16;
3745 rptr &= rdev->ih.ptr_mask;
3746 }
3747 if (queue_hotplug)
3748 schedule_work(&rdev->hotplug_work);
3749 if (queue_reset)
3750 schedule_work(&rdev->reset_work);
3751 rdev->ih.rptr = rptr;
3752 WREG32(IH_RB_RPTR, rdev->ih.rptr);
3753 atomic_set(&rdev->ih.lock, 0);
3754
3755 /* make sure wptr hasn't changed while processing */
3756 wptr = cik_get_ih_wptr(rdev);
3757 if (wptr != rptr)
3758 goto restart_ih;
3759
3760 return IRQ_HANDLED;
3761}