blob: 7aae670080d3dc90d8ce058382963b423eacebb0 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deucher1c491652013-04-09 12:45:26 -040075extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040076extern void si_rlc_fini(struct radeon_device *rdev);
77extern int si_rlc_init(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040078
Alex Deucherbc8273f2012-06-29 19:44:04 -040079#define BONAIRE_IO_MC_REGS_SIZE 36
80
81static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
82{
83 {0x00000070, 0x04400000},
84 {0x00000071, 0x80c01803},
85 {0x00000072, 0x00004004},
86 {0x00000073, 0x00000100},
87 {0x00000074, 0x00ff0000},
88 {0x00000075, 0x34000000},
89 {0x00000076, 0x08000014},
90 {0x00000077, 0x00cc08ec},
91 {0x00000078, 0x00000400},
92 {0x00000079, 0x00000000},
93 {0x0000007a, 0x04090000},
94 {0x0000007c, 0x00000000},
95 {0x0000007e, 0x4408a8e8},
96 {0x0000007f, 0x00000304},
97 {0x00000080, 0x00000000},
98 {0x00000082, 0x00000001},
99 {0x00000083, 0x00000002},
100 {0x00000084, 0xf3e4f400},
101 {0x00000085, 0x052024e3},
102 {0x00000087, 0x00000000},
103 {0x00000088, 0x01000000},
104 {0x0000008a, 0x1c0a0000},
105 {0x0000008b, 0xff010000},
106 {0x0000008d, 0xffffefff},
107 {0x0000008e, 0xfff3efff},
108 {0x0000008f, 0xfff3efbf},
109 {0x00000092, 0xf7ffffff},
110 {0x00000093, 0xffffff7f},
111 {0x00000095, 0x00101101},
112 {0x00000096, 0x00000fff},
113 {0x00000097, 0x00116fff},
114 {0x00000098, 0x60010000},
115 {0x00000099, 0x10010000},
116 {0x0000009a, 0x00006000},
117 {0x0000009b, 0x00001000},
118 {0x0000009f, 0x00b48000}
119};
120
121/* ucode loading */
122/**
123 * ci_mc_load_microcode - load MC ucode into the hw
124 *
125 * @rdev: radeon_device pointer
126 *
127 * Load the GDDR MC ucode into the hw (CIK).
128 * Returns 0 on success, error on failure.
129 */
130static int ci_mc_load_microcode(struct radeon_device *rdev)
131{
132 const __be32 *fw_data;
133 u32 running, blackout = 0;
134 u32 *io_mc_regs;
135 int i, ucode_size, regs_size;
136
137 if (!rdev->mc_fw)
138 return -EINVAL;
139
140 switch (rdev->family) {
141 case CHIP_BONAIRE:
142 default:
143 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
144 ucode_size = CIK_MC_UCODE_SIZE;
145 regs_size = BONAIRE_IO_MC_REGS_SIZE;
146 break;
147 }
148
149 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
150
151 if (running == 0) {
152 if (running) {
153 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
154 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
155 }
156
157 /* reset the engine and set to writable */
158 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
159 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
160
161 /* load mc io regs */
162 for (i = 0; i < regs_size; i++) {
163 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
164 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
165 }
166 /* load the MC ucode */
167 fw_data = (const __be32 *)rdev->mc_fw->data;
168 for (i = 0; i < ucode_size; i++)
169 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
170
171 /* put the engine back into the active state */
172 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
173 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
174 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
175
176 /* wait for training to complete */
177 for (i = 0; i < rdev->usec_timeout; i++) {
178 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
179 break;
180 udelay(1);
181 }
182 for (i = 0; i < rdev->usec_timeout; i++) {
183 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
184 break;
185 udelay(1);
186 }
187
188 if (running)
189 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
190 }
191
192 return 0;
193}
194
Alex Deucher02c81322012-12-18 21:43:07 -0500195/**
196 * cik_init_microcode - load ucode images from disk
197 *
198 * @rdev: radeon_device pointer
199 *
200 * Use the firmware interface to load the ucode images into
201 * the driver (not loaded into hw).
202 * Returns 0 on success, error on failure.
203 */
204static int cik_init_microcode(struct radeon_device *rdev)
205{
206 struct platform_device *pdev;
207 const char *chip_name;
208 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400209 mec_req_size, rlc_req_size, mc_req_size,
210 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500211 char fw_name[30];
212 int err;
213
214 DRM_DEBUG("\n");
215
216 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
217 err = IS_ERR(pdev);
218 if (err) {
219 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
220 return -EINVAL;
221 }
222
223 switch (rdev->family) {
224 case CHIP_BONAIRE:
225 chip_name = "BONAIRE";
226 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
227 me_req_size = CIK_ME_UCODE_SIZE * 4;
228 ce_req_size = CIK_CE_UCODE_SIZE * 4;
229 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
230 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
231 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400232 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500233 break;
234 case CHIP_KAVERI:
235 chip_name = "KAVERI";
236 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
237 me_req_size = CIK_ME_UCODE_SIZE * 4;
238 ce_req_size = CIK_CE_UCODE_SIZE * 4;
239 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
240 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400241 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500242 break;
243 case CHIP_KABINI:
244 chip_name = "KABINI";
245 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
246 me_req_size = CIK_ME_UCODE_SIZE * 4;
247 ce_req_size = CIK_CE_UCODE_SIZE * 4;
248 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
249 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400250 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500251 break;
252 default: BUG();
253 }
254
255 DRM_INFO("Loading %s Microcode\n", chip_name);
256
257 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
258 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
259 if (err)
260 goto out;
261 if (rdev->pfp_fw->size != pfp_req_size) {
262 printk(KERN_ERR
263 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
264 rdev->pfp_fw->size, fw_name);
265 err = -EINVAL;
266 goto out;
267 }
268
269 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
270 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
271 if (err)
272 goto out;
273 if (rdev->me_fw->size != me_req_size) {
274 printk(KERN_ERR
275 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
276 rdev->me_fw->size, fw_name);
277 err = -EINVAL;
278 }
279
280 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
281 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
282 if (err)
283 goto out;
284 if (rdev->ce_fw->size != ce_req_size) {
285 printk(KERN_ERR
286 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
287 rdev->ce_fw->size, fw_name);
288 err = -EINVAL;
289 }
290
291 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
292 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
293 if (err)
294 goto out;
295 if (rdev->mec_fw->size != mec_req_size) {
296 printk(KERN_ERR
297 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
298 rdev->mec_fw->size, fw_name);
299 err = -EINVAL;
300 }
301
302 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
303 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
304 if (err)
305 goto out;
306 if (rdev->rlc_fw->size != rlc_req_size) {
307 printk(KERN_ERR
308 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
309 rdev->rlc_fw->size, fw_name);
310 err = -EINVAL;
311 }
312
Alex Deucher21a93e12013-04-09 12:47:11 -0400313 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
314 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
315 if (err)
316 goto out;
317 if (rdev->sdma_fw->size != sdma_req_size) {
318 printk(KERN_ERR
319 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
320 rdev->sdma_fw->size, fw_name);
321 err = -EINVAL;
322 }
323
Alex Deucher02c81322012-12-18 21:43:07 -0500324 /* No MC ucode on APUs */
325 if (!(rdev->flags & RADEON_IS_IGP)) {
326 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
327 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
328 if (err)
329 goto out;
330 if (rdev->mc_fw->size != mc_req_size) {
331 printk(KERN_ERR
332 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
333 rdev->mc_fw->size, fw_name);
334 err = -EINVAL;
335 }
336 }
337
338out:
339 platform_device_unregister(pdev);
340
341 if (err) {
342 if (err != -EINVAL)
343 printk(KERN_ERR
344 "cik_cp: Failed to load firmware \"%s\"\n",
345 fw_name);
346 release_firmware(rdev->pfp_fw);
347 rdev->pfp_fw = NULL;
348 release_firmware(rdev->me_fw);
349 rdev->me_fw = NULL;
350 release_firmware(rdev->ce_fw);
351 rdev->ce_fw = NULL;
352 release_firmware(rdev->rlc_fw);
353 rdev->rlc_fw = NULL;
354 release_firmware(rdev->mc_fw);
355 rdev->mc_fw = NULL;
356 }
357 return err;
358}
359
Alex Deucher8cc1a532013-04-09 12:41:24 -0400360/*
361 * Core functions
362 */
363/**
364 * cik_tiling_mode_table_init - init the hw tiling table
365 *
366 * @rdev: radeon_device pointer
367 *
368 * Starting with SI, the tiling setup is done globally in a
369 * set of 32 tiling modes. Rather than selecting each set of
370 * parameters per surface as on older asics, we just select
371 * which index in the tiling table we want to use, and the
372 * surface uses those parameters (CIK).
373 */
374static void cik_tiling_mode_table_init(struct radeon_device *rdev)
375{
376 const u32 num_tile_mode_states = 32;
377 const u32 num_secondary_tile_mode_states = 16;
378 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
379 u32 num_pipe_configs;
380 u32 num_rbs = rdev->config.cik.max_backends_per_se *
381 rdev->config.cik.max_shader_engines;
382
383 switch (rdev->config.cik.mem_row_size_in_kb) {
384 case 1:
385 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
386 break;
387 case 2:
388 default:
389 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
390 break;
391 case 4:
392 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
393 break;
394 }
395
396 num_pipe_configs = rdev->config.cik.max_tile_pipes;
397 if (num_pipe_configs > 8)
398 num_pipe_configs = 8; /* ??? */
399
400 if (num_pipe_configs == 8) {
401 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
402 switch (reg_offset) {
403 case 0:
404 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
406 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
408 break;
409 case 1:
410 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
412 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
413 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
414 break;
415 case 2:
416 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
418 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
420 break;
421 case 3:
422 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
424 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
426 break;
427 case 4:
428 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
430 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
431 TILE_SPLIT(split_equal_to_row_size));
432 break;
433 case 5:
434 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
436 break;
437 case 6:
438 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
440 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
442 break;
443 case 7:
444 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
446 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
447 TILE_SPLIT(split_equal_to_row_size));
448 break;
449 case 8:
450 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
452 break;
453 case 9:
454 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
455 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
456 break;
457 case 10:
458 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
459 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
460 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
462 break;
463 case 11:
464 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
465 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
466 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
468 break;
469 case 12:
470 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
471 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
474 break;
475 case 13:
476 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
477 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
478 break;
479 case 14:
480 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
481 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
482 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
484 break;
485 case 16:
486 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
487 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
488 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
490 break;
491 case 17:
492 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
493 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
494 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
496 break;
497 case 27:
498 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
499 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
500 break;
501 case 28:
502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
503 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
506 break;
507 case 29:
508 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
510 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
512 break;
513 case 30:
514 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
515 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
518 break;
519 default:
520 gb_tile_moden = 0;
521 break;
522 }
523 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
524 }
525 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
526 switch (reg_offset) {
527 case 0:
528 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
531 NUM_BANKS(ADDR_SURF_16_BANK));
532 break;
533 case 1:
534 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
537 NUM_BANKS(ADDR_SURF_16_BANK));
538 break;
539 case 2:
540 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
543 NUM_BANKS(ADDR_SURF_16_BANK));
544 break;
545 case 3:
546 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
549 NUM_BANKS(ADDR_SURF_16_BANK));
550 break;
551 case 4:
552 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
555 NUM_BANKS(ADDR_SURF_8_BANK));
556 break;
557 case 5:
558 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
561 NUM_BANKS(ADDR_SURF_4_BANK));
562 break;
563 case 6:
564 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
567 NUM_BANKS(ADDR_SURF_2_BANK));
568 break;
569 case 8:
570 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
573 NUM_BANKS(ADDR_SURF_16_BANK));
574 break;
575 case 9:
576 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
577 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
578 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
579 NUM_BANKS(ADDR_SURF_16_BANK));
580 break;
581 case 10:
582 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
585 NUM_BANKS(ADDR_SURF_16_BANK));
586 break;
587 case 11:
588 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
589 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
590 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
591 NUM_BANKS(ADDR_SURF_16_BANK));
592 break;
593 case 12:
594 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
597 NUM_BANKS(ADDR_SURF_8_BANK));
598 break;
599 case 13:
600 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
601 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
602 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
603 NUM_BANKS(ADDR_SURF_4_BANK));
604 break;
605 case 14:
606 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
609 NUM_BANKS(ADDR_SURF_2_BANK));
610 break;
611 default:
612 gb_tile_moden = 0;
613 break;
614 }
615 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
616 }
617 } else if (num_pipe_configs == 4) {
618 if (num_rbs == 4) {
619 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
620 switch (reg_offset) {
621 case 0:
622 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
624 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
626 break;
627 case 1:
628 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
629 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
630 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
632 break;
633 case 2:
634 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
636 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
638 break;
639 case 3:
640 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
641 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
644 break;
645 case 4:
646 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
648 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
649 TILE_SPLIT(split_equal_to_row_size));
650 break;
651 case 5:
652 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
654 break;
655 case 6:
656 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
657 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
660 break;
661 case 7:
662 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
663 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
664 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
665 TILE_SPLIT(split_equal_to_row_size));
666 break;
667 case 8:
668 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
669 PIPE_CONFIG(ADDR_SURF_P4_16x16));
670 break;
671 case 9:
672 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
673 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
674 break;
675 case 10:
676 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
677 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
680 break;
681 case 11:
682 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
683 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
684 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
686 break;
687 case 12:
688 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
689 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
690 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
692 break;
693 case 13:
694 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
695 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
696 break;
697 case 14:
698 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
699 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
700 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
702 break;
703 case 16:
704 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
705 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
706 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
708 break;
709 case 17:
710 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
711 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
714 break;
715 case 27:
716 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
718 break;
719 case 28:
720 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
724 break;
725 case 29:
726 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
728 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
730 break;
731 case 30:
732 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
733 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
734 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
736 break;
737 default:
738 gb_tile_moden = 0;
739 break;
740 }
741 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
742 }
743 } else if (num_rbs < 4) {
744 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
745 switch (reg_offset) {
746 case 0:
747 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
748 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
749 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
750 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
751 break;
752 case 1:
753 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
755 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
756 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
757 break;
758 case 2:
759 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
761 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
762 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
763 break;
764 case 3:
765 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
766 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
767 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
768 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
769 break;
770 case 4:
771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
773 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
774 TILE_SPLIT(split_equal_to_row_size));
775 break;
776 case 5:
777 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
779 break;
780 case 6:
781 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
782 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
783 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
785 break;
786 case 7:
787 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
789 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
790 TILE_SPLIT(split_equal_to_row_size));
791 break;
792 case 8:
793 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
794 PIPE_CONFIG(ADDR_SURF_P4_8x16));
795 break;
796 case 9:
797 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
798 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
799 break;
800 case 10:
801 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
802 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
803 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
805 break;
806 case 11:
807 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
809 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
811 break;
812 case 12:
813 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
814 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
815 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
817 break;
818 case 13:
819 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
820 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
821 break;
822 case 14:
823 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
824 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
825 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
826 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
827 break;
828 case 16:
829 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
830 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
831 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
833 break;
834 case 17:
835 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
837 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
839 break;
840 case 27:
841 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
842 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
843 break;
844 case 28:
845 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
846 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
847 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
849 break;
850 case 29:
851 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
852 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
853 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
855 break;
856 case 30:
857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
858 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
859 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
861 break;
862 default:
863 gb_tile_moden = 0;
864 break;
865 }
866 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
867 }
868 }
869 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
870 switch (reg_offset) {
871 case 0:
872 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
875 NUM_BANKS(ADDR_SURF_16_BANK));
876 break;
877 case 1:
878 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
879 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
880 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
881 NUM_BANKS(ADDR_SURF_16_BANK));
882 break;
883 case 2:
884 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
887 NUM_BANKS(ADDR_SURF_16_BANK));
888 break;
889 case 3:
890 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
893 NUM_BANKS(ADDR_SURF_16_BANK));
894 break;
895 case 4:
896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
899 NUM_BANKS(ADDR_SURF_16_BANK));
900 break;
901 case 5:
902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
905 NUM_BANKS(ADDR_SURF_8_BANK));
906 break;
907 case 6:
908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
911 NUM_BANKS(ADDR_SURF_4_BANK));
912 break;
913 case 8:
914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
917 NUM_BANKS(ADDR_SURF_16_BANK));
918 break;
919 case 9:
920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
923 NUM_BANKS(ADDR_SURF_16_BANK));
924 break;
925 case 10:
926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
929 NUM_BANKS(ADDR_SURF_16_BANK));
930 break;
931 case 11:
932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
935 NUM_BANKS(ADDR_SURF_16_BANK));
936 break;
937 case 12:
938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
941 NUM_BANKS(ADDR_SURF_16_BANK));
942 break;
943 case 13:
944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
947 NUM_BANKS(ADDR_SURF_8_BANK));
948 break;
949 case 14:
950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
953 NUM_BANKS(ADDR_SURF_4_BANK));
954 break;
955 default:
956 gb_tile_moden = 0;
957 break;
958 }
959 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
960 }
961 } else if (num_pipe_configs == 2) {
962 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
963 switch (reg_offset) {
964 case 0:
965 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
966 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
967 PIPE_CONFIG(ADDR_SURF_P2) |
968 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
969 break;
970 case 1:
971 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
972 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
973 PIPE_CONFIG(ADDR_SURF_P2) |
974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
975 break;
976 case 2:
977 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P2) |
980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
981 break;
982 case 3:
983 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985 PIPE_CONFIG(ADDR_SURF_P2) |
986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
987 break;
988 case 4:
989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
991 PIPE_CONFIG(ADDR_SURF_P2) |
992 TILE_SPLIT(split_equal_to_row_size));
993 break;
994 case 5:
995 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
997 break;
998 case 6:
999 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1001 PIPE_CONFIG(ADDR_SURF_P2) |
1002 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1003 break;
1004 case 7:
1005 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1007 PIPE_CONFIG(ADDR_SURF_P2) |
1008 TILE_SPLIT(split_equal_to_row_size));
1009 break;
1010 case 8:
1011 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1012 break;
1013 case 9:
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1016 break;
1017 case 10:
1018 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1019 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1020 PIPE_CONFIG(ADDR_SURF_P2) |
1021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1022 break;
1023 case 11:
1024 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1025 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1026 PIPE_CONFIG(ADDR_SURF_P2) |
1027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1028 break;
1029 case 12:
1030 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1032 PIPE_CONFIG(ADDR_SURF_P2) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 break;
1035 case 13:
1036 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1037 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1038 break;
1039 case 14:
1040 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1041 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1042 PIPE_CONFIG(ADDR_SURF_P2) |
1043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1044 break;
1045 case 16:
1046 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1047 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1048 PIPE_CONFIG(ADDR_SURF_P2) |
1049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1050 break;
1051 case 17:
1052 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1054 PIPE_CONFIG(ADDR_SURF_P2) |
1055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1056 break;
1057 case 27:
1058 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1059 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1060 break;
1061 case 28:
1062 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1063 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1064 PIPE_CONFIG(ADDR_SURF_P2) |
1065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1066 break;
1067 case 29:
1068 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1069 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1070 PIPE_CONFIG(ADDR_SURF_P2) |
1071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1072 break;
1073 case 30:
1074 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1075 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1076 PIPE_CONFIG(ADDR_SURF_P2) |
1077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1078 break;
1079 default:
1080 gb_tile_moden = 0;
1081 break;
1082 }
1083 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1084 }
1085 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1086 switch (reg_offset) {
1087 case 0:
1088 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1091 NUM_BANKS(ADDR_SURF_16_BANK));
1092 break;
1093 case 1:
1094 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097 NUM_BANKS(ADDR_SURF_16_BANK));
1098 break;
1099 case 2:
1100 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1102 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103 NUM_BANKS(ADDR_SURF_16_BANK));
1104 break;
1105 case 3:
1106 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1109 NUM_BANKS(ADDR_SURF_16_BANK));
1110 break;
1111 case 4:
1112 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1115 NUM_BANKS(ADDR_SURF_16_BANK));
1116 break;
1117 case 5:
1118 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1121 NUM_BANKS(ADDR_SURF_16_BANK));
1122 break;
1123 case 6:
1124 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1127 NUM_BANKS(ADDR_SURF_8_BANK));
1128 break;
1129 case 8:
1130 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1133 NUM_BANKS(ADDR_SURF_16_BANK));
1134 break;
1135 case 9:
1136 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1139 NUM_BANKS(ADDR_SURF_16_BANK));
1140 break;
1141 case 10:
1142 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1145 NUM_BANKS(ADDR_SURF_16_BANK));
1146 break;
1147 case 11:
1148 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1151 NUM_BANKS(ADDR_SURF_16_BANK));
1152 break;
1153 case 12:
1154 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1157 NUM_BANKS(ADDR_SURF_16_BANK));
1158 break;
1159 case 13:
1160 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1163 NUM_BANKS(ADDR_SURF_16_BANK));
1164 break;
1165 case 14:
1166 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1169 NUM_BANKS(ADDR_SURF_8_BANK));
1170 break;
1171 default:
1172 gb_tile_moden = 0;
1173 break;
1174 }
1175 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1176 }
1177 } else
1178 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1179}
1180
1181/**
1182 * cik_select_se_sh - select which SE, SH to address
1183 *
1184 * @rdev: radeon_device pointer
1185 * @se_num: shader engine to address
1186 * @sh_num: sh block to address
1187 *
1188 * Select which SE, SH combinations to address. Certain
1189 * registers are instanced per SE or SH. 0xffffffff means
1190 * broadcast to all SEs or SHs (CIK).
1191 */
1192static void cik_select_se_sh(struct radeon_device *rdev,
1193 u32 se_num, u32 sh_num)
1194{
1195 u32 data = INSTANCE_BROADCAST_WRITES;
1196
1197 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1198 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1199 else if (se_num == 0xffffffff)
1200 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1201 else if (sh_num == 0xffffffff)
1202 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1203 else
1204 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1205 WREG32(GRBM_GFX_INDEX, data);
1206}
1207
1208/**
1209 * cik_create_bitmask - create a bitmask
1210 *
1211 * @bit_width: length of the mask
1212 *
1213 * create a variable length bit mask (CIK).
1214 * Returns the bitmask.
1215 */
1216static u32 cik_create_bitmask(u32 bit_width)
1217{
1218 u32 i, mask = 0;
1219
1220 for (i = 0; i < bit_width; i++) {
1221 mask <<= 1;
1222 mask |= 1;
1223 }
1224 return mask;
1225}
1226
1227/**
1228 * cik_select_se_sh - select which SE, SH to address
1229 *
1230 * @rdev: radeon_device pointer
1231 * @max_rb_num: max RBs (render backends) for the asic
1232 * @se_num: number of SEs (shader engines) for the asic
1233 * @sh_per_se: number of SH blocks per SE for the asic
1234 *
1235 * Calculates the bitmask of disabled RBs (CIK).
1236 * Returns the disabled RB bitmask.
1237 */
1238static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1239 u32 max_rb_num, u32 se_num,
1240 u32 sh_per_se)
1241{
1242 u32 data, mask;
1243
1244 data = RREG32(CC_RB_BACKEND_DISABLE);
1245 if (data & 1)
1246 data &= BACKEND_DISABLE_MASK;
1247 else
1248 data = 0;
1249 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1250
1251 data >>= BACKEND_DISABLE_SHIFT;
1252
1253 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1254
1255 return data & mask;
1256}
1257
1258/**
1259 * cik_setup_rb - setup the RBs on the asic
1260 *
1261 * @rdev: radeon_device pointer
1262 * @se_num: number of SEs (shader engines) for the asic
1263 * @sh_per_se: number of SH blocks per SE for the asic
1264 * @max_rb_num: max RBs (render backends) for the asic
1265 *
1266 * Configures per-SE/SH RB registers (CIK).
1267 */
1268static void cik_setup_rb(struct radeon_device *rdev,
1269 u32 se_num, u32 sh_per_se,
1270 u32 max_rb_num)
1271{
1272 int i, j;
1273 u32 data, mask;
1274 u32 disabled_rbs = 0;
1275 u32 enabled_rbs = 0;
1276
1277 for (i = 0; i < se_num; i++) {
1278 for (j = 0; j < sh_per_se; j++) {
1279 cik_select_se_sh(rdev, i, j);
1280 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1281 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1282 }
1283 }
1284 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1285
1286 mask = 1;
1287 for (i = 0; i < max_rb_num; i++) {
1288 if (!(disabled_rbs & mask))
1289 enabled_rbs |= mask;
1290 mask <<= 1;
1291 }
1292
1293 for (i = 0; i < se_num; i++) {
1294 cik_select_se_sh(rdev, i, 0xffffffff);
1295 data = 0;
1296 for (j = 0; j < sh_per_se; j++) {
1297 switch (enabled_rbs & 3) {
1298 case 1:
1299 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1300 break;
1301 case 2:
1302 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1303 break;
1304 case 3:
1305 default:
1306 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1307 break;
1308 }
1309 enabled_rbs >>= 2;
1310 }
1311 WREG32(PA_SC_RASTER_CONFIG, data);
1312 }
1313 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1314}
1315
1316/**
1317 * cik_gpu_init - setup the 3D engine
1318 *
1319 * @rdev: radeon_device pointer
1320 *
1321 * Configures the 3D engine and tiling configuration
1322 * registers so that the 3D engine is usable.
1323 */
1324static void cik_gpu_init(struct radeon_device *rdev)
1325{
1326 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1327 u32 mc_shared_chmap, mc_arb_ramcfg;
1328 u32 hdp_host_path_cntl;
1329 u32 tmp;
1330 int i, j;
1331
1332 switch (rdev->family) {
1333 case CHIP_BONAIRE:
1334 rdev->config.cik.max_shader_engines = 2;
1335 rdev->config.cik.max_tile_pipes = 4;
1336 rdev->config.cik.max_cu_per_sh = 7;
1337 rdev->config.cik.max_sh_per_se = 1;
1338 rdev->config.cik.max_backends_per_se = 2;
1339 rdev->config.cik.max_texture_channel_caches = 4;
1340 rdev->config.cik.max_gprs = 256;
1341 rdev->config.cik.max_gs_threads = 32;
1342 rdev->config.cik.max_hw_contexts = 8;
1343
1344 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1345 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1346 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1347 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1348 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1349 break;
1350 case CHIP_KAVERI:
1351 /* TODO */
1352 break;
1353 case CHIP_KABINI:
1354 default:
1355 rdev->config.cik.max_shader_engines = 1;
1356 rdev->config.cik.max_tile_pipes = 2;
1357 rdev->config.cik.max_cu_per_sh = 2;
1358 rdev->config.cik.max_sh_per_se = 1;
1359 rdev->config.cik.max_backends_per_se = 1;
1360 rdev->config.cik.max_texture_channel_caches = 2;
1361 rdev->config.cik.max_gprs = 256;
1362 rdev->config.cik.max_gs_threads = 16;
1363 rdev->config.cik.max_hw_contexts = 8;
1364
1365 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1366 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1367 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1368 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1369 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1370 break;
1371 }
1372
1373 /* Initialize HDP */
1374 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1375 WREG32((0x2c14 + j), 0x00000000);
1376 WREG32((0x2c18 + j), 0x00000000);
1377 WREG32((0x2c1c + j), 0x00000000);
1378 WREG32((0x2c20 + j), 0x00000000);
1379 WREG32((0x2c24 + j), 0x00000000);
1380 }
1381
1382 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1383
1384 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1385
1386 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1387 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1388
1389 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1390 rdev->config.cik.mem_max_burst_length_bytes = 256;
1391 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1392 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1393 if (rdev->config.cik.mem_row_size_in_kb > 4)
1394 rdev->config.cik.mem_row_size_in_kb = 4;
1395 /* XXX use MC settings? */
1396 rdev->config.cik.shader_engine_tile_size = 32;
1397 rdev->config.cik.num_gpus = 1;
1398 rdev->config.cik.multi_gpu_tile_size = 64;
1399
1400 /* fix up row size */
1401 gb_addr_config &= ~ROW_SIZE_MASK;
1402 switch (rdev->config.cik.mem_row_size_in_kb) {
1403 case 1:
1404 default:
1405 gb_addr_config |= ROW_SIZE(0);
1406 break;
1407 case 2:
1408 gb_addr_config |= ROW_SIZE(1);
1409 break;
1410 case 4:
1411 gb_addr_config |= ROW_SIZE(2);
1412 break;
1413 }
1414
1415 /* setup tiling info dword. gb_addr_config is not adequate since it does
1416 * not have bank info, so create a custom tiling dword.
1417 * bits 3:0 num_pipes
1418 * bits 7:4 num_banks
1419 * bits 11:8 group_size
1420 * bits 15:12 row_size
1421 */
1422 rdev->config.cik.tile_config = 0;
1423 switch (rdev->config.cik.num_tile_pipes) {
1424 case 1:
1425 rdev->config.cik.tile_config |= (0 << 0);
1426 break;
1427 case 2:
1428 rdev->config.cik.tile_config |= (1 << 0);
1429 break;
1430 case 4:
1431 rdev->config.cik.tile_config |= (2 << 0);
1432 break;
1433 case 8:
1434 default:
1435 /* XXX what about 12? */
1436 rdev->config.cik.tile_config |= (3 << 0);
1437 break;
1438 }
1439 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1440 rdev->config.cik.tile_config |= 1 << 4;
1441 else
1442 rdev->config.cik.tile_config |= 0 << 4;
1443 rdev->config.cik.tile_config |=
1444 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1445 rdev->config.cik.tile_config |=
1446 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1447
1448 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1449 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1450 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001451 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1452 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001453
1454 cik_tiling_mode_table_init(rdev);
1455
1456 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1457 rdev->config.cik.max_sh_per_se,
1458 rdev->config.cik.max_backends_per_se);
1459
1460 /* set HW defaults for 3D engine */
1461 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1462
1463 WREG32(SX_DEBUG_1, 0x20);
1464
1465 WREG32(TA_CNTL_AUX, 0x00010000);
1466
1467 tmp = RREG32(SPI_CONFIG_CNTL);
1468 tmp |= 0x03000000;
1469 WREG32(SPI_CONFIG_CNTL, tmp);
1470
1471 WREG32(SQ_CONFIG, 1);
1472
1473 WREG32(DB_DEBUG, 0);
1474
1475 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1476 tmp |= 0x00000400;
1477 WREG32(DB_DEBUG2, tmp);
1478
1479 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
1480 tmp |= 0x00020200;
1481 WREG32(DB_DEBUG3, tmp);
1482
1483 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
1484 tmp |= 0x00018208;
1485 WREG32(CB_HW_CONTROL, tmp);
1486
1487 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1488
1489 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
1490 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
1491 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
1492 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
1493
1494 WREG32(VGT_NUM_INSTANCES, 1);
1495
1496 WREG32(CP_PERFMON_CNTL, 0);
1497
1498 WREG32(SQ_CONFIG, 0);
1499
1500 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1501 FORCE_EOV_MAX_REZ_CNT(255)));
1502
1503 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1504 AUTO_INVLD_EN(ES_AND_GS_AUTO));
1505
1506 WREG32(VGT_GS_VERTEX_REUSE, 16);
1507 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1508
1509 tmp = RREG32(HDP_MISC_CNTL);
1510 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1511 WREG32(HDP_MISC_CNTL, tmp);
1512
1513 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1514 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1515
1516 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1517 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
1518
1519 udelay(50);
1520}
1521
Alex Deucher841cf442012-12-18 21:47:44 -05001522/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001523 * GPU scratch registers helpers function.
1524 */
1525/**
1526 * cik_scratch_init - setup driver info for CP scratch regs
1527 *
1528 * @rdev: radeon_device pointer
1529 *
1530 * Set up the number and offset of the CP scratch registers.
1531 * NOTE: use of CP scratch registers is a legacy inferface and
1532 * is not used by default on newer asics (r6xx+). On newer asics,
1533 * memory buffers are used for fences rather than scratch regs.
1534 */
1535static void cik_scratch_init(struct radeon_device *rdev)
1536{
1537 int i;
1538
1539 rdev->scratch.num_reg = 7;
1540 rdev->scratch.reg_base = SCRATCH_REG0;
1541 for (i = 0; i < rdev->scratch.num_reg; i++) {
1542 rdev->scratch.free[i] = true;
1543 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1544 }
1545}
1546
1547/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04001548 * cik_ring_test - basic gfx ring test
1549 *
1550 * @rdev: radeon_device pointer
1551 * @ring: radeon_ring structure holding ring information
1552 *
1553 * Allocate a scratch register and write to it using the gfx ring (CIK).
1554 * Provides a basic gfx ring test to verify that the ring is working.
1555 * Used by cik_cp_gfx_resume();
1556 * Returns 0 on success, error on failure.
1557 */
1558int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
1559{
1560 uint32_t scratch;
1561 uint32_t tmp = 0;
1562 unsigned i;
1563 int r;
1564
1565 r = radeon_scratch_get(rdev, &scratch);
1566 if (r) {
1567 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
1568 return r;
1569 }
1570 WREG32(scratch, 0xCAFEDEAD);
1571 r = radeon_ring_lock(rdev, ring, 3);
1572 if (r) {
1573 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
1574 radeon_scratch_free(rdev, scratch);
1575 return r;
1576 }
1577 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1578 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
1579 radeon_ring_write(ring, 0xDEADBEEF);
1580 radeon_ring_unlock_commit(rdev, ring);
1581 for (i = 0; i < rdev->usec_timeout; i++) {
1582 tmp = RREG32(scratch);
1583 if (tmp == 0xDEADBEEF)
1584 break;
1585 DRM_UDELAY(1);
1586 }
1587 if (i < rdev->usec_timeout) {
1588 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
1589 } else {
1590 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1591 ring->idx, scratch, tmp);
1592 r = -EINVAL;
1593 }
1594 radeon_scratch_free(rdev, scratch);
1595 return r;
1596}
1597
1598/**
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001599 * cik_fence_ring_emit - emit a fence on the gfx ring
1600 *
1601 * @rdev: radeon_device pointer
1602 * @fence: radeon fence object
1603 *
1604 * Emits a fence sequnce number on the gfx ring and flushes
1605 * GPU caches.
1606 */
1607void cik_fence_ring_emit(struct radeon_device *rdev,
1608 struct radeon_fence *fence)
1609{
1610 struct radeon_ring *ring = &rdev->ring[fence->ring];
1611 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1612
1613 /* EVENT_WRITE_EOP - flush caches, send int */
1614 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1615 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
1616 EOP_TC_ACTION_EN |
1617 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
1618 EVENT_INDEX(5)));
1619 radeon_ring_write(ring, addr & 0xfffffffc);
1620 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1621 radeon_ring_write(ring, fence->seq);
1622 radeon_ring_write(ring, 0);
1623 /* HDP flush */
1624 /* We should be using the new WAIT_REG_MEM special op packet here
1625 * but it causes the CP to hang
1626 */
1627 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1628 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
1629 WRITE_DATA_DST_SEL(0)));
1630 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
1631 radeon_ring_write(ring, 0);
1632 radeon_ring_write(ring, 0);
1633}
1634
1635void cik_semaphore_ring_emit(struct radeon_device *rdev,
1636 struct radeon_ring *ring,
1637 struct radeon_semaphore *semaphore,
1638 bool emit_wait)
1639{
1640 uint64_t addr = semaphore->gpu_addr;
1641 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
1642
1643 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
1644 radeon_ring_write(ring, addr & 0xffffffff);
1645 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
1646}
1647
1648/*
1649 * IB stuff
1650 */
1651/**
1652 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1653 *
1654 * @rdev: radeon_device pointer
1655 * @ib: radeon indirect buffer object
1656 *
1657 * Emits an DE (drawing engine) or CE (constant engine) IB
1658 * on the gfx ring. IBs are usually generated by userspace
1659 * acceleration drivers and submitted to the kernel for
1660 * sheduling on the ring. This function schedules the IB
1661 * on the gfx ring for execution by the GPU.
1662 */
1663void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1664{
1665 struct radeon_ring *ring = &rdev->ring[ib->ring];
1666 u32 header, control = INDIRECT_BUFFER_VALID;
1667
1668 if (ib->is_const_ib) {
1669 /* set switch buffer packet before const IB */
1670 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
1671 radeon_ring_write(ring, 0);
1672
1673 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1674 } else {
1675 u32 next_rptr;
1676 if (ring->rptr_save_reg) {
1677 next_rptr = ring->wptr + 3 + 4;
1678 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1679 radeon_ring_write(ring, ((ring->rptr_save_reg -
1680 PACKET3_SET_UCONFIG_REG_START) >> 2));
1681 radeon_ring_write(ring, next_rptr);
1682 } else if (rdev->wb.enabled) {
1683 next_rptr = ring->wptr + 5 + 4;
1684 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1685 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
1686 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1687 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1688 radeon_ring_write(ring, next_rptr);
1689 }
1690
1691 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1692 }
1693
1694 control |= ib->length_dw |
1695 (ib->vm ? (ib->vm->id << 24) : 0);
1696
1697 radeon_ring_write(ring, header);
1698 radeon_ring_write(ring,
1699#ifdef __BIG_ENDIAN
1700 (2 << 0) |
1701#endif
1702 (ib->gpu_addr & 0xFFFFFFFC));
1703 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1704 radeon_ring_write(ring, control);
1705}
1706
Alex Deucherfbc832c2012-07-20 14:41:35 -04001707/**
1708 * cik_ib_test - basic gfx ring IB test
1709 *
1710 * @rdev: radeon_device pointer
1711 * @ring: radeon_ring structure holding ring information
1712 *
1713 * Allocate an IB and execute it on the gfx ring (CIK).
1714 * Provides a basic gfx ring test to verify that IBs are working.
1715 * Returns 0 on success, error on failure.
1716 */
1717int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
1718{
1719 struct radeon_ib ib;
1720 uint32_t scratch;
1721 uint32_t tmp = 0;
1722 unsigned i;
1723 int r;
1724
1725 r = radeon_scratch_get(rdev, &scratch);
1726 if (r) {
1727 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
1728 return r;
1729 }
1730 WREG32(scratch, 0xCAFEDEAD);
1731 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
1732 if (r) {
1733 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
1734 return r;
1735 }
1736 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
1737 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
1738 ib.ptr[2] = 0xDEADBEEF;
1739 ib.length_dw = 3;
1740 r = radeon_ib_schedule(rdev, &ib, NULL);
1741 if (r) {
1742 radeon_scratch_free(rdev, scratch);
1743 radeon_ib_free(rdev, &ib);
1744 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
1745 return r;
1746 }
1747 r = radeon_fence_wait(ib.fence, false);
1748 if (r) {
1749 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
1750 return r;
1751 }
1752 for (i = 0; i < rdev->usec_timeout; i++) {
1753 tmp = RREG32(scratch);
1754 if (tmp == 0xDEADBEEF)
1755 break;
1756 DRM_UDELAY(1);
1757 }
1758 if (i < rdev->usec_timeout) {
1759 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
1760 } else {
1761 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1762 scratch, tmp);
1763 r = -EINVAL;
1764 }
1765 radeon_scratch_free(rdev, scratch);
1766 radeon_ib_free(rdev, &ib);
1767 return r;
1768}
1769
Alex Deucher2cae3bc2012-07-05 11:45:40 -04001770/*
Alex Deucher841cf442012-12-18 21:47:44 -05001771 * CP.
1772 * On CIK, gfx and compute now have independant command processors.
1773 *
1774 * GFX
1775 * Gfx consists of a single ring and can process both gfx jobs and
1776 * compute jobs. The gfx CP consists of three microengines (ME):
1777 * PFP - Pre-Fetch Parser
1778 * ME - Micro Engine
1779 * CE - Constant Engine
1780 * The PFP and ME make up what is considered the Drawing Engine (DE).
1781 * The CE is an asynchronous engine used for updating buffer desciptors
1782 * used by the DE so that they can be loaded into cache in parallel
1783 * while the DE is processing state update packets.
1784 *
1785 * Compute
1786 * The compute CP consists of two microengines (ME):
1787 * MEC1 - Compute MicroEngine 1
1788 * MEC2 - Compute MicroEngine 2
1789 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1790 * The queues are exposed to userspace and are programmed directly
1791 * by the compute runtime.
1792 */
1793/**
1794 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1795 *
1796 * @rdev: radeon_device pointer
1797 * @enable: enable or disable the MEs
1798 *
1799 * Halts or unhalts the gfx MEs.
1800 */
1801static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
1802{
1803 if (enable)
1804 WREG32(CP_ME_CNTL, 0);
1805 else {
1806 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1807 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1808 }
1809 udelay(50);
1810}
1811
1812/**
1813 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1814 *
1815 * @rdev: radeon_device pointer
1816 *
1817 * Loads the gfx PFP, ME, and CE ucode.
1818 * Returns 0 for success, -EINVAL if the ucode is not available.
1819 */
1820static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
1821{
1822 const __be32 *fw_data;
1823 int i;
1824
1825 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
1826 return -EINVAL;
1827
1828 cik_cp_gfx_enable(rdev, false);
1829
1830 /* PFP */
1831 fw_data = (const __be32 *)rdev->pfp_fw->data;
1832 WREG32(CP_PFP_UCODE_ADDR, 0);
1833 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
1834 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1835 WREG32(CP_PFP_UCODE_ADDR, 0);
1836
1837 /* CE */
1838 fw_data = (const __be32 *)rdev->ce_fw->data;
1839 WREG32(CP_CE_UCODE_ADDR, 0);
1840 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
1841 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1842 WREG32(CP_CE_UCODE_ADDR, 0);
1843
1844 /* ME */
1845 fw_data = (const __be32 *)rdev->me_fw->data;
1846 WREG32(CP_ME_RAM_WADDR, 0);
1847 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
1848 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1849 WREG32(CP_ME_RAM_WADDR, 0);
1850
1851 WREG32(CP_PFP_UCODE_ADDR, 0);
1852 WREG32(CP_CE_UCODE_ADDR, 0);
1853 WREG32(CP_ME_RAM_WADDR, 0);
1854 WREG32(CP_ME_RAM_RADDR, 0);
1855 return 0;
1856}
1857
1858/**
1859 * cik_cp_gfx_start - start the gfx ring
1860 *
1861 * @rdev: radeon_device pointer
1862 *
1863 * Enables the ring and loads the clear state context and other
1864 * packets required to init the ring.
1865 * Returns 0 for success, error for failure.
1866 */
1867static int cik_cp_gfx_start(struct radeon_device *rdev)
1868{
1869 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1870 int r, i;
1871
1872 /* init the CP */
1873 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
1874 WREG32(CP_ENDIAN_SWAP, 0);
1875 WREG32(CP_DEVICE_ID, 1);
1876
1877 cik_cp_gfx_enable(rdev, true);
1878
1879 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
1880 if (r) {
1881 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1882 return r;
1883 }
1884
1885 /* init the CE partitions. CE only used for gfx on CIK */
1886 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1887 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1888 radeon_ring_write(ring, 0xc000);
1889 radeon_ring_write(ring, 0xc000);
1890
1891 /* setup clear context state */
1892 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1893 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1894
1895 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1896 radeon_ring_write(ring, 0x80000000);
1897 radeon_ring_write(ring, 0x80000000);
1898
1899 for (i = 0; i < cik_default_size; i++)
1900 radeon_ring_write(ring, cik_default_state[i]);
1901
1902 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1903 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1904
1905 /* set clear context state */
1906 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1907 radeon_ring_write(ring, 0);
1908
1909 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1910 radeon_ring_write(ring, 0x00000316);
1911 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1912 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1913
1914 radeon_ring_unlock_commit(rdev, ring);
1915
1916 return 0;
1917}
1918
1919/**
1920 * cik_cp_gfx_fini - stop the gfx ring
1921 *
1922 * @rdev: radeon_device pointer
1923 *
1924 * Stop the gfx ring and tear down the driver ring
1925 * info.
1926 */
1927static void cik_cp_gfx_fini(struct radeon_device *rdev)
1928{
1929 cik_cp_gfx_enable(rdev, false);
1930 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1931}
1932
1933/**
1934 * cik_cp_gfx_resume - setup the gfx ring buffer registers
1935 *
1936 * @rdev: radeon_device pointer
1937 *
1938 * Program the location and size of the gfx ring buffer
1939 * and test it to make sure it's working.
1940 * Returns 0 for success, error for failure.
1941 */
1942static int cik_cp_gfx_resume(struct radeon_device *rdev)
1943{
1944 struct radeon_ring *ring;
1945 u32 tmp;
1946 u32 rb_bufsz;
1947 u64 rb_addr;
1948 int r;
1949
1950 WREG32(CP_SEM_WAIT_TIMER, 0x0);
1951 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1952
1953 /* Set the write pointer delay */
1954 WREG32(CP_RB_WPTR_DELAY, 0);
1955
1956 /* set the RB to use vmid 0 */
1957 WREG32(CP_RB_VMID, 0);
1958
1959 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1960
1961 /* ring 0 - compute and gfx */
1962 /* Set ring buffer size */
1963 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1964 rb_bufsz = drm_order(ring->ring_size / 8);
1965 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1966#ifdef __BIG_ENDIAN
1967 tmp |= BUF_SWAP_32BIT;
1968#endif
1969 WREG32(CP_RB0_CNTL, tmp);
1970
1971 /* Initialize the ring buffer's read and write pointers */
1972 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1973 ring->wptr = 0;
1974 WREG32(CP_RB0_WPTR, ring->wptr);
1975
1976 /* set the wb address wether it's enabled or not */
1977 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1978 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1979
1980 /* scratch register shadowing is no longer supported */
1981 WREG32(SCRATCH_UMSK, 0);
1982
1983 if (!rdev->wb.enabled)
1984 tmp |= RB_NO_UPDATE;
1985
1986 mdelay(1);
1987 WREG32(CP_RB0_CNTL, tmp);
1988
1989 rb_addr = ring->gpu_addr >> 8;
1990 WREG32(CP_RB0_BASE, rb_addr);
1991 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
1992
1993 ring->rptr = RREG32(CP_RB0_RPTR);
1994
1995 /* start the ring */
1996 cik_cp_gfx_start(rdev);
1997 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1998 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1999 if (r) {
2000 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2001 return r;
2002 }
2003 return 0;
2004}
2005
2006/**
2007 * cik_cp_compute_enable - enable/disable the compute CP MEs
2008 *
2009 * @rdev: radeon_device pointer
2010 * @enable: enable or disable the MEs
2011 *
2012 * Halts or unhalts the compute MEs.
2013 */
2014static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2015{
2016 if (enable)
2017 WREG32(CP_MEC_CNTL, 0);
2018 else
2019 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2020 udelay(50);
2021}
2022
2023/**
2024 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2025 *
2026 * @rdev: radeon_device pointer
2027 *
2028 * Loads the compute MEC1&2 ucode.
2029 * Returns 0 for success, -EINVAL if the ucode is not available.
2030 */
2031static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2032{
2033 const __be32 *fw_data;
2034 int i;
2035
2036 if (!rdev->mec_fw)
2037 return -EINVAL;
2038
2039 cik_cp_compute_enable(rdev, false);
2040
2041 /* MEC1 */
2042 fw_data = (const __be32 *)rdev->mec_fw->data;
2043 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2044 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2045 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2046 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2047
2048 if (rdev->family == CHIP_KAVERI) {
2049 /* MEC2 */
2050 fw_data = (const __be32 *)rdev->mec_fw->data;
2051 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2052 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2053 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2054 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2055 }
2056
2057 return 0;
2058}
2059
2060/**
2061 * cik_cp_compute_start - start the compute queues
2062 *
2063 * @rdev: radeon_device pointer
2064 *
2065 * Enable the compute queues.
2066 * Returns 0 for success, error for failure.
2067 */
2068static int cik_cp_compute_start(struct radeon_device *rdev)
2069{
2070 //todo
2071 return 0;
2072}
2073
2074/**
2075 * cik_cp_compute_fini - stop the compute queues
2076 *
2077 * @rdev: radeon_device pointer
2078 *
2079 * Stop the compute queues and tear down the driver queue
2080 * info.
2081 */
2082static void cik_cp_compute_fini(struct radeon_device *rdev)
2083{
2084 cik_cp_compute_enable(rdev, false);
2085 //todo
2086}
2087
2088/**
2089 * cik_cp_compute_resume - setup the compute queue registers
2090 *
2091 * @rdev: radeon_device pointer
2092 *
2093 * Program the compute queues and test them to make sure they
2094 * are working.
2095 * Returns 0 for success, error for failure.
2096 */
2097static int cik_cp_compute_resume(struct radeon_device *rdev)
2098{
2099 int r;
2100
2101 //todo
2102 r = cik_cp_compute_start(rdev);
2103 if (r)
2104 return r;
2105 return 0;
2106}
2107
2108/* XXX temporary wrappers to handle both compute and gfx */
2109/* XXX */
2110static void cik_cp_enable(struct radeon_device *rdev, bool enable)
2111{
2112 cik_cp_gfx_enable(rdev, enable);
2113 cik_cp_compute_enable(rdev, enable);
2114}
2115
2116/* XXX */
2117static int cik_cp_load_microcode(struct radeon_device *rdev)
2118{
2119 int r;
2120
2121 r = cik_cp_gfx_load_microcode(rdev);
2122 if (r)
2123 return r;
2124 r = cik_cp_compute_load_microcode(rdev);
2125 if (r)
2126 return r;
2127
2128 return 0;
2129}
2130
2131/* XXX */
2132static void cik_cp_fini(struct radeon_device *rdev)
2133{
2134 cik_cp_gfx_fini(rdev);
2135 cik_cp_compute_fini(rdev);
2136}
2137
2138/* XXX */
2139static int cik_cp_resume(struct radeon_device *rdev)
2140{
2141 int r;
2142
2143 /* Reset all cp blocks */
2144 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
2145 RREG32(GRBM_SOFT_RESET);
2146 mdelay(15);
2147 WREG32(GRBM_SOFT_RESET, 0);
2148 RREG32(GRBM_SOFT_RESET);
2149
2150 r = cik_cp_load_microcode(rdev);
2151 if (r)
2152 return r;
2153
2154 r = cik_cp_gfx_resume(rdev);
2155 if (r)
2156 return r;
2157 r = cik_cp_compute_resume(rdev);
2158 if (r)
2159 return r;
2160
2161 return 0;
2162}
2163
Alex Deucher21a93e12013-04-09 12:47:11 -04002164/*
2165 * sDMA - System DMA
2166 * Starting with CIK, the GPU has new asynchronous
2167 * DMA engines. These engines are used for compute
2168 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2169 * and each one supports 1 ring buffer used for gfx
2170 * and 2 queues used for compute.
2171 *
2172 * The programming model is very similar to the CP
2173 * (ring buffer, IBs, etc.), but sDMA has it's own
2174 * packet format that is different from the PM4 format
2175 * used by the CP. sDMA supports copying data, writing
2176 * embedded data, solid fills, and a number of other
2177 * things. It also has support for tiling/detiling of
2178 * buffers.
2179 */
2180/**
2181 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2182 *
2183 * @rdev: radeon_device pointer
2184 * @ib: IB object to schedule
2185 *
2186 * Schedule an IB in the DMA ring (CIK).
2187 */
2188void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
2189 struct radeon_ib *ib)
2190{
2191 struct radeon_ring *ring = &rdev->ring[ib->ring];
2192 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
2193
2194 if (rdev->wb.enabled) {
2195 u32 next_rptr = ring->wptr + 5;
2196 while ((next_rptr & 7) != 4)
2197 next_rptr++;
2198 next_rptr += 4;
2199 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2200 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2201 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2202 radeon_ring_write(ring, 1); /* number of DWs to follow */
2203 radeon_ring_write(ring, next_rptr);
2204 }
2205
2206 /* IB packet must end on a 8 DW boundary */
2207 while ((ring->wptr & 7) != 4)
2208 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
2209 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
2210 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
2211 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
2212 radeon_ring_write(ring, ib->length_dw);
2213
2214}
2215
2216/**
2217 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2218 *
2219 * @rdev: radeon_device pointer
2220 * @fence: radeon fence object
2221 *
2222 * Add a DMA fence packet to the ring to write
2223 * the fence seq number and DMA trap packet to generate
2224 * an interrupt if needed (CIK).
2225 */
2226void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2227 struct radeon_fence *fence)
2228{
2229 struct radeon_ring *ring = &rdev->ring[fence->ring];
2230 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2231 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2232 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2233 u32 ref_and_mask;
2234
2235 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
2236 ref_and_mask = SDMA0;
2237 else
2238 ref_and_mask = SDMA1;
2239
2240 /* write the fence */
2241 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2242 radeon_ring_write(ring, addr & 0xffffffff);
2243 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2244 radeon_ring_write(ring, fence->seq);
2245 /* generate an interrupt */
2246 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2247 /* flush HDP */
2248 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
2249 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
2250 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
2251 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
2252 radeon_ring_write(ring, ref_and_mask); /* MASK */
2253 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2254}
2255
2256/**
2257 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2258 *
2259 * @rdev: radeon_device pointer
2260 * @ring: radeon_ring structure holding ring information
2261 * @semaphore: radeon semaphore object
2262 * @emit_wait: wait or signal semaphore
2263 *
2264 * Add a DMA semaphore packet to the ring wait on or signal
2265 * other rings (CIK).
2266 */
2267void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2268 struct radeon_ring *ring,
2269 struct radeon_semaphore *semaphore,
2270 bool emit_wait)
2271{
2272 u64 addr = semaphore->gpu_addr;
2273 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2274
2275 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2276 radeon_ring_write(ring, addr & 0xfffffff8);
2277 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
2278}
2279
2280/**
2281 * cik_sdma_gfx_stop - stop the gfx async dma engines
2282 *
2283 * @rdev: radeon_device pointer
2284 *
2285 * Stop the gfx async dma ring buffers (CIK).
2286 */
2287static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2288{
2289 u32 rb_cntl, reg_offset;
2290 int i;
2291
2292 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2293
2294 for (i = 0; i < 2; i++) {
2295 if (i == 0)
2296 reg_offset = SDMA0_REGISTER_OFFSET;
2297 else
2298 reg_offset = SDMA1_REGISTER_OFFSET;
2299 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2300 rb_cntl &= ~SDMA_RB_ENABLE;
2301 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2302 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2303 }
2304}
2305
2306/**
2307 * cik_sdma_rlc_stop - stop the compute async dma engines
2308 *
2309 * @rdev: radeon_device pointer
2310 *
2311 * Stop the compute async dma queues (CIK).
2312 */
2313static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2314{
2315 /* XXX todo */
2316}
2317
2318/**
2319 * cik_sdma_enable - stop the async dma engines
2320 *
2321 * @rdev: radeon_device pointer
2322 * @enable: enable/disable the DMA MEs.
2323 *
2324 * Halt or unhalt the async dma engines (CIK).
2325 */
2326static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
2327{
2328 u32 me_cntl, reg_offset;
2329 int i;
2330
2331 for (i = 0; i < 2; i++) {
2332 if (i == 0)
2333 reg_offset = SDMA0_REGISTER_OFFSET;
2334 else
2335 reg_offset = SDMA1_REGISTER_OFFSET;
2336 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
2337 if (enable)
2338 me_cntl &= ~SDMA_HALT;
2339 else
2340 me_cntl |= SDMA_HALT;
2341 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
2342 }
2343}
2344
2345/**
2346 * cik_sdma_gfx_resume - setup and start the async dma engines
2347 *
2348 * @rdev: radeon_device pointer
2349 *
2350 * Set up the gfx DMA ring buffers and enable them (CIK).
2351 * Returns 0 for success, error for failure.
2352 */
2353static int cik_sdma_gfx_resume(struct radeon_device *rdev)
2354{
2355 struct radeon_ring *ring;
2356 u32 rb_cntl, ib_cntl;
2357 u32 rb_bufsz;
2358 u32 reg_offset, wb_offset;
2359 int i, r;
2360
2361 for (i = 0; i < 2; i++) {
2362 if (i == 0) {
2363 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2364 reg_offset = SDMA0_REGISTER_OFFSET;
2365 wb_offset = R600_WB_DMA_RPTR_OFFSET;
2366 } else {
2367 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2368 reg_offset = SDMA1_REGISTER_OFFSET;
2369 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2370 }
2371
2372 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2373 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2374
2375 /* Set ring buffer size in dwords */
2376 rb_bufsz = drm_order(ring->ring_size / 4);
2377 rb_cntl = rb_bufsz << 1;
2378#ifdef __BIG_ENDIAN
2379 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
2380#endif
2381 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2382
2383 /* Initialize the ring buffer's read and write pointers */
2384 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
2385 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
2386
2387 /* set the wb address whether it's enabled or not */
2388 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
2389 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
2390 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
2391 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2392
2393 if (rdev->wb.enabled)
2394 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
2395
2396 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2397 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
2398
2399 ring->wptr = 0;
2400 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
2401
2402 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
2403
2404 /* enable DMA RB */
2405 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
2406
2407 ib_cntl = SDMA_IB_ENABLE;
2408#ifdef __BIG_ENDIAN
2409 ib_cntl |= SDMA_IB_SWAP_ENABLE;
2410#endif
2411 /* enable DMA IBs */
2412 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
2413
2414 ring->ready = true;
2415
2416 r = radeon_ring_test(rdev, ring->idx, ring);
2417 if (r) {
2418 ring->ready = false;
2419 return r;
2420 }
2421 }
2422
2423 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2424
2425 return 0;
2426}
2427
2428/**
2429 * cik_sdma_rlc_resume - setup and start the async dma engines
2430 *
2431 * @rdev: radeon_device pointer
2432 *
2433 * Set up the compute DMA queues and enable them (CIK).
2434 * Returns 0 for success, error for failure.
2435 */
2436static int cik_sdma_rlc_resume(struct radeon_device *rdev)
2437{
2438 /* XXX todo */
2439 return 0;
2440}
2441
2442/**
2443 * cik_sdma_load_microcode - load the sDMA ME ucode
2444 *
2445 * @rdev: radeon_device pointer
2446 *
2447 * Loads the sDMA0/1 ucode.
2448 * Returns 0 for success, -EINVAL if the ucode is not available.
2449 */
2450static int cik_sdma_load_microcode(struct radeon_device *rdev)
2451{
2452 const __be32 *fw_data;
2453 int i;
2454
2455 if (!rdev->sdma_fw)
2456 return -EINVAL;
2457
2458 /* stop the gfx rings and rlc compute queues */
2459 cik_sdma_gfx_stop(rdev);
2460 cik_sdma_rlc_stop(rdev);
2461
2462 /* halt the MEs */
2463 cik_sdma_enable(rdev, false);
2464
2465 /* sdma0 */
2466 fw_data = (const __be32 *)rdev->sdma_fw->data;
2467 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2468 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2469 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2470 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2471
2472 /* sdma1 */
2473 fw_data = (const __be32 *)rdev->sdma_fw->data;
2474 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2475 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
2476 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
2477 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
2478
2479 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
2480 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
2481 return 0;
2482}
2483
2484/**
2485 * cik_sdma_resume - setup and start the async dma engines
2486 *
2487 * @rdev: radeon_device pointer
2488 *
2489 * Set up the DMA engines and enable them (CIK).
2490 * Returns 0 for success, error for failure.
2491 */
2492static int cik_sdma_resume(struct radeon_device *rdev)
2493{
2494 int r;
2495
2496 /* Reset dma */
2497 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2498 RREG32(SRBM_SOFT_RESET);
2499 udelay(50);
2500 WREG32(SRBM_SOFT_RESET, 0);
2501 RREG32(SRBM_SOFT_RESET);
2502
2503 r = cik_sdma_load_microcode(rdev);
2504 if (r)
2505 return r;
2506
2507 /* unhalt the MEs */
2508 cik_sdma_enable(rdev, true);
2509
2510 /* start the gfx rings and rlc compute queues */
2511 r = cik_sdma_gfx_resume(rdev);
2512 if (r)
2513 return r;
2514 r = cik_sdma_rlc_resume(rdev);
2515 if (r)
2516 return r;
2517
2518 return 0;
2519}
2520
2521/**
2522 * cik_sdma_fini - tear down the async dma engines
2523 *
2524 * @rdev: radeon_device pointer
2525 *
2526 * Stop the async dma engines and free the rings (CIK).
2527 */
2528static void cik_sdma_fini(struct radeon_device *rdev)
2529{
2530 /* stop the gfx rings and rlc compute queues */
2531 cik_sdma_gfx_stop(rdev);
2532 cik_sdma_rlc_stop(rdev);
2533 /* halt the MEs */
2534 cik_sdma_enable(rdev, false);
2535 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2536 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2537 /* XXX - compute dma queue tear down */
2538}
2539
2540/**
2541 * cik_copy_dma - copy pages using the DMA engine
2542 *
2543 * @rdev: radeon_device pointer
2544 * @src_offset: src GPU address
2545 * @dst_offset: dst GPU address
2546 * @num_gpu_pages: number of GPU pages to xfer
2547 * @fence: radeon fence object
2548 *
2549 * Copy GPU paging using the DMA engine (CIK).
2550 * Used by the radeon ttm implementation to move pages if
2551 * registered as the asic copy callback.
2552 */
2553int cik_copy_dma(struct radeon_device *rdev,
2554 uint64_t src_offset, uint64_t dst_offset,
2555 unsigned num_gpu_pages,
2556 struct radeon_fence **fence)
2557{
2558 struct radeon_semaphore *sem = NULL;
2559 int ring_index = rdev->asic->copy.dma_ring_index;
2560 struct radeon_ring *ring = &rdev->ring[ring_index];
2561 u32 size_in_bytes, cur_size_in_bytes;
2562 int i, num_loops;
2563 int r = 0;
2564
2565 r = radeon_semaphore_create(rdev, &sem);
2566 if (r) {
2567 DRM_ERROR("radeon: moving bo (%d).\n", r);
2568 return r;
2569 }
2570
2571 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
2572 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
2573 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
2574 if (r) {
2575 DRM_ERROR("radeon: moving bo (%d).\n", r);
2576 radeon_semaphore_free(rdev, &sem, NULL);
2577 return r;
2578 }
2579
2580 if (radeon_fence_need_sync(*fence, ring->idx)) {
2581 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2582 ring->idx);
2583 radeon_fence_note_sync(*fence, ring->idx);
2584 } else {
2585 radeon_semaphore_free(rdev, &sem, NULL);
2586 }
2587
2588 for (i = 0; i < num_loops; i++) {
2589 cur_size_in_bytes = size_in_bytes;
2590 if (cur_size_in_bytes > 0x1fffff)
2591 cur_size_in_bytes = 0x1fffff;
2592 size_in_bytes -= cur_size_in_bytes;
2593 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
2594 radeon_ring_write(ring, cur_size_in_bytes);
2595 radeon_ring_write(ring, 0); /* src/dst endian swap */
2596 radeon_ring_write(ring, src_offset & 0xffffffff);
2597 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
2598 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2599 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
2600 src_offset += cur_size_in_bytes;
2601 dst_offset += cur_size_in_bytes;
2602 }
2603
2604 r = radeon_fence_emit(rdev, fence, ring->idx);
2605 if (r) {
2606 radeon_ring_unlock_undo(rdev, ring);
2607 return r;
2608 }
2609
2610 radeon_ring_unlock_commit(rdev, ring);
2611 radeon_semaphore_free(rdev, &sem, *fence);
2612
2613 return r;
2614}
2615
2616/**
2617 * cik_sdma_ring_test - simple async dma engine test
2618 *
2619 * @rdev: radeon_device pointer
2620 * @ring: radeon_ring structure holding ring information
2621 *
2622 * Test the DMA engine by writing using it to write an
2623 * value to memory. (CIK).
2624 * Returns 0 for success, error for failure.
2625 */
2626int cik_sdma_ring_test(struct radeon_device *rdev,
2627 struct radeon_ring *ring)
2628{
2629 unsigned i;
2630 int r;
2631 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2632 u32 tmp;
2633
2634 if (!ptr) {
2635 DRM_ERROR("invalid vram scratch pointer\n");
2636 return -EINVAL;
2637 }
2638
2639 tmp = 0xCAFEDEAD;
2640 writel(tmp, ptr);
2641
2642 r = radeon_ring_lock(rdev, ring, 4);
2643 if (r) {
2644 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2645 return r;
2646 }
2647 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
2648 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2649 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
2650 radeon_ring_write(ring, 1); /* number of DWs to follow */
2651 radeon_ring_write(ring, 0xDEADBEEF);
2652 radeon_ring_unlock_commit(rdev, ring);
2653
2654 for (i = 0; i < rdev->usec_timeout; i++) {
2655 tmp = readl(ptr);
2656 if (tmp == 0xDEADBEEF)
2657 break;
2658 DRM_UDELAY(1);
2659 }
2660
2661 if (i < rdev->usec_timeout) {
2662 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2663 } else {
2664 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2665 ring->idx, tmp);
2666 r = -EINVAL;
2667 }
2668 return r;
2669}
2670
2671/**
2672 * cik_sdma_ib_test - test an IB on the DMA engine
2673 *
2674 * @rdev: radeon_device pointer
2675 * @ring: radeon_ring structure holding ring information
2676 *
2677 * Test a simple IB in the DMA ring (CIK).
2678 * Returns 0 on success, error on failure.
2679 */
2680int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2681{
2682 struct radeon_ib ib;
2683 unsigned i;
2684 int r;
2685 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2686 u32 tmp = 0;
2687
2688 if (!ptr) {
2689 DRM_ERROR("invalid vram scratch pointer\n");
2690 return -EINVAL;
2691 }
2692
2693 tmp = 0xCAFEDEAD;
2694 writel(tmp, ptr);
2695
2696 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2697 if (r) {
2698 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2699 return r;
2700 }
2701
2702 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2703 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
2704 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
2705 ib.ptr[3] = 1;
2706 ib.ptr[4] = 0xDEADBEEF;
2707 ib.length_dw = 5;
2708
2709 r = radeon_ib_schedule(rdev, &ib, NULL);
2710 if (r) {
2711 radeon_ib_free(rdev, &ib);
2712 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2713 return r;
2714 }
2715 r = radeon_fence_wait(ib.fence, false);
2716 if (r) {
2717 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2718 return r;
2719 }
2720 for (i = 0; i < rdev->usec_timeout; i++) {
2721 tmp = readl(ptr);
2722 if (tmp == 0xDEADBEEF)
2723 break;
2724 DRM_UDELAY(1);
2725 }
2726 if (i < rdev->usec_timeout) {
2727 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2728 } else {
2729 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
2730 r = -EINVAL;
2731 }
2732 radeon_ib_free(rdev, &ib);
2733 return r;
2734}
2735
Alex Deucher6f2043c2013-04-09 12:43:41 -04002736/**
2737 * cik_gpu_is_lockup - check if the 3D engine is locked up
2738 *
2739 * @rdev: radeon_device pointer
2740 * @ring: radeon_ring structure holding ring information
2741 *
2742 * Check if the 3D engine is locked up (CIK).
2743 * Returns true if the engine is locked, false if not.
2744 */
2745bool cik_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2746{
2747 u32 srbm_status, srbm_status2;
2748 u32 grbm_status, grbm_status2;
2749 u32 grbm_status_se0, grbm_status_se1, grbm_status_se2, grbm_status_se3;
2750
2751 srbm_status = RREG32(SRBM_STATUS);
2752 srbm_status2 = RREG32(SRBM_STATUS2);
2753 grbm_status = RREG32(GRBM_STATUS);
2754 grbm_status2 = RREG32(GRBM_STATUS2);
2755 grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2756 grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2757 grbm_status_se2 = RREG32(GRBM_STATUS_SE2);
2758 grbm_status_se3 = RREG32(GRBM_STATUS_SE3);
2759 if (!(grbm_status & GUI_ACTIVE)) {
2760 radeon_ring_lockup_update(ring);
2761 return false;
2762 }
2763 /* force CP activities */
2764 radeon_ring_force_activity(rdev, ring);
2765 return radeon_ring_test_lockup(rdev, ring);
2766}
2767
2768/**
2769 * cik_gfx_gpu_soft_reset - soft reset the 3D engine and CPG
2770 *
2771 * @rdev: radeon_device pointer
2772 *
2773 * Soft reset the GFX engine and CPG blocks (CIK).
2774 * XXX: deal with reseting RLC and CPF
2775 * Returns 0 for success.
2776 */
2777static int cik_gfx_gpu_soft_reset(struct radeon_device *rdev)
2778{
2779 struct evergreen_mc_save save;
2780 u32 grbm_reset = 0;
2781
2782 if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2783 return 0;
2784
2785 dev_info(rdev->dev, "GPU GFX softreset \n");
2786 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2787 RREG32(GRBM_STATUS));
2788 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2789 RREG32(GRBM_STATUS2));
2790 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2791 RREG32(GRBM_STATUS_SE0));
2792 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2793 RREG32(GRBM_STATUS_SE1));
2794 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2795 RREG32(GRBM_STATUS_SE2));
2796 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2797 RREG32(GRBM_STATUS_SE3));
2798 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2799 RREG32(SRBM_STATUS));
2800 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2801 RREG32(SRBM_STATUS2));
2802 evergreen_mc_stop(rdev, &save);
2803 if (radeon_mc_wait_for_idle(rdev)) {
2804 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2805 }
2806 /* Disable CP parsing/prefetching */
2807 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2808
2809 /* reset all the gfx block and all CPG blocks */
2810 grbm_reset = SOFT_RESET_CPG | SOFT_RESET_GFX;
2811
2812 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2813 WREG32(GRBM_SOFT_RESET, grbm_reset);
2814 (void)RREG32(GRBM_SOFT_RESET);
2815 udelay(50);
2816 WREG32(GRBM_SOFT_RESET, 0);
2817 (void)RREG32(GRBM_SOFT_RESET);
2818 /* Wait a little for things to settle down */
2819 udelay(50);
2820 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2821 RREG32(GRBM_STATUS));
2822 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2823 RREG32(GRBM_STATUS2));
2824 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2825 RREG32(GRBM_STATUS_SE0));
2826 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2827 RREG32(GRBM_STATUS_SE1));
2828 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2829 RREG32(GRBM_STATUS_SE2));
2830 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2831 RREG32(GRBM_STATUS_SE3));
2832 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2833 RREG32(SRBM_STATUS));
2834 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2835 RREG32(SRBM_STATUS2));
2836 evergreen_mc_resume(rdev, &save);
2837 return 0;
2838}
2839
2840/**
2841 * cik_compute_gpu_soft_reset - soft reset CPC
2842 *
2843 * @rdev: radeon_device pointer
2844 *
2845 * Soft reset the CPC blocks (CIK).
2846 * XXX: deal with reseting RLC and CPF
2847 * Returns 0 for success.
2848 */
2849static int cik_compute_gpu_soft_reset(struct radeon_device *rdev)
2850{
2851 struct evergreen_mc_save save;
2852 u32 grbm_reset = 0;
2853
2854 dev_info(rdev->dev, "GPU compute softreset \n");
2855 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2856 RREG32(GRBM_STATUS));
2857 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2858 RREG32(GRBM_STATUS2));
2859 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2860 RREG32(GRBM_STATUS_SE0));
2861 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2862 RREG32(GRBM_STATUS_SE1));
2863 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2864 RREG32(GRBM_STATUS_SE2));
2865 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2866 RREG32(GRBM_STATUS_SE3));
2867 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2868 RREG32(SRBM_STATUS));
2869 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2870 RREG32(SRBM_STATUS2));
2871 evergreen_mc_stop(rdev, &save);
2872 if (radeon_mc_wait_for_idle(rdev)) {
2873 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2874 }
2875 /* Disable CP parsing/prefetching */
2876 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
2877
2878 /* reset all the CPC blocks */
2879 grbm_reset = SOFT_RESET_CPG;
2880
2881 dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2882 WREG32(GRBM_SOFT_RESET, grbm_reset);
2883 (void)RREG32(GRBM_SOFT_RESET);
2884 udelay(50);
2885 WREG32(GRBM_SOFT_RESET, 0);
2886 (void)RREG32(GRBM_SOFT_RESET);
2887 /* Wait a little for things to settle down */
2888 udelay(50);
2889 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2890 RREG32(GRBM_STATUS));
2891 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2892 RREG32(GRBM_STATUS2));
2893 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2894 RREG32(GRBM_STATUS_SE0));
2895 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2896 RREG32(GRBM_STATUS_SE1));
2897 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
2898 RREG32(GRBM_STATUS_SE2));
2899 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
2900 RREG32(GRBM_STATUS_SE3));
2901 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2902 RREG32(SRBM_STATUS));
2903 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
2904 RREG32(SRBM_STATUS2));
2905 evergreen_mc_resume(rdev, &save);
2906 return 0;
2907}
2908
2909/**
2910 * cik_asic_reset - soft reset compute and gfx
2911 *
2912 * @rdev: radeon_device pointer
2913 *
2914 * Soft reset the CPC blocks (CIK).
2915 * XXX: make this more fine grained and only reset
2916 * what is necessary.
2917 * Returns 0 for success.
2918 */
2919int cik_asic_reset(struct radeon_device *rdev)
2920{
2921 int r;
2922
2923 r = cik_compute_gpu_soft_reset(rdev);
2924 if (r)
2925 dev_info(rdev->dev, "Compute reset failed!\n");
2926
2927 return cik_gfx_gpu_soft_reset(rdev);
2928}
Alex Deucher1c491652013-04-09 12:45:26 -04002929
Alex Deucher21a93e12013-04-09 12:47:11 -04002930/**
2931 * cik_sdma_is_lockup - Check if the DMA engine is locked up
2932 *
2933 * @rdev: radeon_device pointer
2934 * @ring: radeon_ring structure holding ring information
2935 *
2936 * Check if the async DMA engine is locked up (CIK).
2937 * Returns true if the engine appears to be locked up, false if not.
2938 */
2939bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2940{
2941 u32 dma_status_reg;
2942
2943 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2944 dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
2945 else
2946 dma_status_reg = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
2947 if (dma_status_reg & SDMA_IDLE) {
2948 radeon_ring_lockup_update(ring);
2949 return false;
2950 }
2951 /* force ring activities */
2952 radeon_ring_force_activity(rdev, ring);
2953 return radeon_ring_test_lockup(rdev, ring);
2954}
2955
Alex Deucher1c491652013-04-09 12:45:26 -04002956/* MC */
2957/**
2958 * cik_mc_program - program the GPU memory controller
2959 *
2960 * @rdev: radeon_device pointer
2961 *
2962 * Set the location of vram, gart, and AGP in the GPU's
2963 * physical address space (CIK).
2964 */
2965static void cik_mc_program(struct radeon_device *rdev)
2966{
2967 struct evergreen_mc_save save;
2968 u32 tmp;
2969 int i, j;
2970
2971 /* Initialize HDP */
2972 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2973 WREG32((0x2c14 + j), 0x00000000);
2974 WREG32((0x2c18 + j), 0x00000000);
2975 WREG32((0x2c1c + j), 0x00000000);
2976 WREG32((0x2c20 + j), 0x00000000);
2977 WREG32((0x2c24 + j), 0x00000000);
2978 }
2979 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2980
2981 evergreen_mc_stop(rdev, &save);
2982 if (radeon_mc_wait_for_idle(rdev)) {
2983 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2984 }
2985 /* Lockout access through VGA aperture*/
2986 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2987 /* Update configuration */
2988 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2989 rdev->mc.vram_start >> 12);
2990 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2991 rdev->mc.vram_end >> 12);
2992 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2993 rdev->vram_scratch.gpu_addr >> 12);
2994 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2995 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2996 WREG32(MC_VM_FB_LOCATION, tmp);
2997 /* XXX double check these! */
2998 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2999 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3000 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3001 WREG32(MC_VM_AGP_BASE, 0);
3002 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3003 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3004 if (radeon_mc_wait_for_idle(rdev)) {
3005 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3006 }
3007 evergreen_mc_resume(rdev, &save);
3008 /* we need to own VRAM, so turn off the VGA renderer here
3009 * to stop it overwriting our objects */
3010 rv515_vga_render_disable(rdev);
3011}
3012
3013/**
3014 * cik_mc_init - initialize the memory controller driver params
3015 *
3016 * @rdev: radeon_device pointer
3017 *
3018 * Look up the amount of vram, vram width, and decide how to place
3019 * vram and gart within the GPU's physical address space (CIK).
3020 * Returns 0 for success.
3021 */
3022static int cik_mc_init(struct radeon_device *rdev)
3023{
3024 u32 tmp;
3025 int chansize, numchan;
3026
3027 /* Get VRAM informations */
3028 rdev->mc.vram_is_ddr = true;
3029 tmp = RREG32(MC_ARB_RAMCFG);
3030 if (tmp & CHANSIZE_MASK) {
3031 chansize = 64;
3032 } else {
3033 chansize = 32;
3034 }
3035 tmp = RREG32(MC_SHARED_CHMAP);
3036 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3037 case 0:
3038 default:
3039 numchan = 1;
3040 break;
3041 case 1:
3042 numchan = 2;
3043 break;
3044 case 2:
3045 numchan = 4;
3046 break;
3047 case 3:
3048 numchan = 8;
3049 break;
3050 case 4:
3051 numchan = 3;
3052 break;
3053 case 5:
3054 numchan = 6;
3055 break;
3056 case 6:
3057 numchan = 10;
3058 break;
3059 case 7:
3060 numchan = 12;
3061 break;
3062 case 8:
3063 numchan = 16;
3064 break;
3065 }
3066 rdev->mc.vram_width = numchan * chansize;
3067 /* Could aper size report 0 ? */
3068 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3069 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3070 /* size in MB on si */
3071 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3072 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3073 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3074 si_vram_gtt_location(rdev, &rdev->mc);
3075 radeon_update_bandwidth_info(rdev);
3076
3077 return 0;
3078}
3079
3080/*
3081 * GART
3082 * VMID 0 is the physical GPU addresses as used by the kernel.
3083 * VMIDs 1-15 are used for userspace clients and are handled
3084 * by the radeon vm/hsa code.
3085 */
3086/**
3087 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3088 *
3089 * @rdev: radeon_device pointer
3090 *
3091 * Flush the TLB for the VMID 0 page table (CIK).
3092 */
3093void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
3094{
3095 /* flush hdp cache */
3096 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
3097
3098 /* bits 0-15 are the VM contexts0-15 */
3099 WREG32(VM_INVALIDATE_REQUEST, 0x1);
3100}
3101
3102/**
3103 * cik_pcie_gart_enable - gart enable
3104 *
3105 * @rdev: radeon_device pointer
3106 *
3107 * This sets up the TLBs, programs the page tables for VMID0,
3108 * sets up the hw for VMIDs 1-15 which are allocated on
3109 * demand, and sets up the global locations for the LDS, GDS,
3110 * and GPUVM for FSA64 clients (CIK).
3111 * Returns 0 for success, errors for failure.
3112 */
3113static int cik_pcie_gart_enable(struct radeon_device *rdev)
3114{
3115 int r, i;
3116
3117 if (rdev->gart.robj == NULL) {
3118 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3119 return -EINVAL;
3120 }
3121 r = radeon_gart_table_vram_pin(rdev);
3122 if (r)
3123 return r;
3124 radeon_gart_restore(rdev);
3125 /* Setup TLB control */
3126 WREG32(MC_VM_MX_L1_TLB_CNTL,
3127 (0xA << 7) |
3128 ENABLE_L1_TLB |
3129 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3130 ENABLE_ADVANCED_DRIVER_MODEL |
3131 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3132 /* Setup L2 cache */
3133 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3134 ENABLE_L2_FRAGMENT_PROCESSING |
3135 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3136 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3137 EFFECTIVE_L2_QUEUE_SIZE(7) |
3138 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3139 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3140 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3141 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3142 /* setup context0 */
3143 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3144 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3145 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3146 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3147 (u32)(rdev->dummy_page.addr >> 12));
3148 WREG32(VM_CONTEXT0_CNTL2, 0);
3149 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3150 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3151
3152 WREG32(0x15D4, 0);
3153 WREG32(0x15D8, 0);
3154 WREG32(0x15DC, 0);
3155
3156 /* empty context1-15 */
3157 /* FIXME start with 4G, once using 2 level pt switch to full
3158 * vm size space
3159 */
3160 /* set vm size, must be a multiple of 4 */
3161 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3162 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3163 for (i = 1; i < 16; i++) {
3164 if (i < 8)
3165 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3166 rdev->gart.table_addr >> 12);
3167 else
3168 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3169 rdev->gart.table_addr >> 12);
3170 }
3171
3172 /* enable context1-15 */
3173 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3174 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04003175 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04003176 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04003177 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3178 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3179 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3180 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3181 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3182 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3183 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3184 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3185 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3186 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3187 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3188 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04003189
3190 /* TC cache setup ??? */
3191 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
3192 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
3193 WREG32(TC_CFG_L1_STORE_POLICY, 0);
3194
3195 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
3196 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
3197 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
3198 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
3199 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
3200
3201 WREG32(TC_CFG_L1_VOLATILE, 0);
3202 WREG32(TC_CFG_L2_VOLATILE, 0);
3203
3204 if (rdev->family == CHIP_KAVERI) {
3205 u32 tmp = RREG32(CHUB_CONTROL);
3206 tmp &= ~BYPASS_VM;
3207 WREG32(CHUB_CONTROL, tmp);
3208 }
3209
3210 /* XXX SH_MEM regs */
3211 /* where to put LDS, scratch, GPUVM in FSA64 space */
3212 for (i = 0; i < 16; i++) {
3213 WREG32(SRBM_GFX_CNTL, VMID(i));
Alex Deucher21a93e12013-04-09 12:47:11 -04003214 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04003215 WREG32(SH_MEM_CONFIG, 0);
3216 WREG32(SH_MEM_APE1_BASE, 1);
3217 WREG32(SH_MEM_APE1_LIMIT, 0);
3218 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04003219 /* SDMA GFX */
3220 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
3221 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
3222 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
3223 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
3224 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04003225 }
3226 WREG32(SRBM_GFX_CNTL, 0);
3227
3228 cik_pcie_gart_tlb_flush(rdev);
3229 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3230 (unsigned)(rdev->mc.gtt_size >> 20),
3231 (unsigned long long)rdev->gart.table_addr);
3232 rdev->gart.ready = true;
3233 return 0;
3234}
3235
3236/**
3237 * cik_pcie_gart_disable - gart disable
3238 *
3239 * @rdev: radeon_device pointer
3240 *
3241 * This disables all VM page table (CIK).
3242 */
3243static void cik_pcie_gart_disable(struct radeon_device *rdev)
3244{
3245 /* Disable all tables */
3246 WREG32(VM_CONTEXT0_CNTL, 0);
3247 WREG32(VM_CONTEXT1_CNTL, 0);
3248 /* Setup TLB control */
3249 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3250 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3251 /* Setup L2 cache */
3252 WREG32(VM_L2_CNTL,
3253 ENABLE_L2_FRAGMENT_PROCESSING |
3254 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3255 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3256 EFFECTIVE_L2_QUEUE_SIZE(7) |
3257 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3258 WREG32(VM_L2_CNTL2, 0);
3259 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3260 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3261 radeon_gart_table_vram_unpin(rdev);
3262}
3263
3264/**
3265 * cik_pcie_gart_fini - vm fini callback
3266 *
3267 * @rdev: radeon_device pointer
3268 *
3269 * Tears down the driver GART/VM setup (CIK).
3270 */
3271static void cik_pcie_gart_fini(struct radeon_device *rdev)
3272{
3273 cik_pcie_gart_disable(rdev);
3274 radeon_gart_table_vram_free(rdev);
3275 radeon_gart_fini(rdev);
3276}
3277
3278/* vm parser */
3279/**
3280 * cik_ib_parse - vm ib_parse callback
3281 *
3282 * @rdev: radeon_device pointer
3283 * @ib: indirect buffer pointer
3284 *
3285 * CIK uses hw IB checking so this is a nop (CIK).
3286 */
3287int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3288{
3289 return 0;
3290}
3291
3292/*
3293 * vm
3294 * VMID 0 is the physical GPU addresses as used by the kernel.
3295 * VMIDs 1-15 are used for userspace clients and are handled
3296 * by the radeon vm/hsa code.
3297 */
3298/**
3299 * cik_vm_init - cik vm init callback
3300 *
3301 * @rdev: radeon_device pointer
3302 *
3303 * Inits cik specific vm parameters (number of VMs, base of vram for
3304 * VMIDs 1-15) (CIK).
3305 * Returns 0 for success.
3306 */
3307int cik_vm_init(struct radeon_device *rdev)
3308{
3309 /* number of VMs */
3310 rdev->vm_manager.nvm = 16;
3311 /* base offset of vram pages */
3312 if (rdev->flags & RADEON_IS_IGP) {
3313 u64 tmp = RREG32(MC_VM_FB_OFFSET);
3314 tmp <<= 22;
3315 rdev->vm_manager.vram_base_offset = tmp;
3316 } else
3317 rdev->vm_manager.vram_base_offset = 0;
3318
3319 return 0;
3320}
3321
3322/**
3323 * cik_vm_fini - cik vm fini callback
3324 *
3325 * @rdev: radeon_device pointer
3326 *
3327 * Tear down any asic specific VM setup (CIK).
3328 */
3329void cik_vm_fini(struct radeon_device *rdev)
3330{
3331}
3332
Alex Deucherf96ab482012-08-31 10:37:47 -04003333/**
3334 * cik_vm_flush - cik vm flush using the CP
3335 *
3336 * @rdev: radeon_device pointer
3337 *
3338 * Update the page table base and flush the VM TLB
3339 * using the CP (CIK).
3340 */
3341void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3342{
3343 struct radeon_ring *ring = &rdev->ring[ridx];
3344
3345 if (vm == NULL)
3346 return;
3347
3348 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3349 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3350 WRITE_DATA_DST_SEL(0)));
3351 if (vm->id < 8) {
3352 radeon_ring_write(ring,
3353 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3354 } else {
3355 radeon_ring_write(ring,
3356 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3357 }
3358 radeon_ring_write(ring, 0);
3359 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3360
3361 /* update SH_MEM_* regs */
3362 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3363 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3364 WRITE_DATA_DST_SEL(0)));
3365 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3366 radeon_ring_write(ring, 0);
3367 radeon_ring_write(ring, VMID(vm->id));
3368
3369 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3370 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3371 WRITE_DATA_DST_SEL(0)));
3372 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3373 radeon_ring_write(ring, 0);
3374
3375 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
3376 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
3377 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3378 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3379
3380 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3381 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3382 WRITE_DATA_DST_SEL(0)));
3383 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3384 radeon_ring_write(ring, 0);
3385 radeon_ring_write(ring, VMID(0));
3386
3387 /* HDP flush */
3388 /* We should be using the WAIT_REG_MEM packet here like in
3389 * cik_fence_ring_emit(), but it causes the CP to hang in this
3390 * context...
3391 */
3392 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3393 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3394 WRITE_DATA_DST_SEL(0)));
3395 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3396 radeon_ring_write(ring, 0);
3397 radeon_ring_write(ring, 0);
3398
3399 /* bits 0-15 are the VM contexts0-15 */
3400 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3401 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3402 WRITE_DATA_DST_SEL(0)));
3403 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3404 radeon_ring_write(ring, 0);
3405 radeon_ring_write(ring, 1 << vm->id);
3406
3407 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3408 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3409 radeon_ring_write(ring, 0x0);
3410}
3411
Alex Deucher605de6b2012-10-22 13:04:03 -04003412/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04003413 * cik_vm_set_page - update the page tables using sDMA
3414 *
3415 * @rdev: radeon_device pointer
3416 * @ib: indirect buffer to fill with commands
3417 * @pe: addr of the page entry
3418 * @addr: dst addr to write into pe
3419 * @count: number of page entries to update
3420 * @incr: increase next addr by incr bytes
3421 * @flags: access flags
3422 *
3423 * Update the page tables using CP or sDMA (CIK).
3424 */
3425void cik_vm_set_page(struct radeon_device *rdev,
3426 struct radeon_ib *ib,
3427 uint64_t pe,
3428 uint64_t addr, unsigned count,
3429 uint32_t incr, uint32_t flags)
3430{
3431 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3432 uint64_t value;
3433 unsigned ndw;
3434
3435 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3436 /* CP */
3437 while (count) {
3438 ndw = 2 + count * 2;
3439 if (ndw > 0x3FFE)
3440 ndw = 0x3FFE;
3441
3442 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3443 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3444 WRITE_DATA_DST_SEL(1));
3445 ib->ptr[ib->length_dw++] = pe;
3446 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3447 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3448 if (flags & RADEON_VM_PAGE_SYSTEM) {
3449 value = radeon_vm_map_gart(rdev, addr);
3450 value &= 0xFFFFFFFFFFFFF000ULL;
3451 } else if (flags & RADEON_VM_PAGE_VALID) {
3452 value = addr;
3453 } else {
3454 value = 0;
3455 }
3456 addr += incr;
3457 value |= r600_flags;
3458 ib->ptr[ib->length_dw++] = value;
3459 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3460 }
3461 }
3462 } else {
3463 /* DMA */
3464 if (flags & RADEON_VM_PAGE_SYSTEM) {
3465 while (count) {
3466 ndw = count * 2;
3467 if (ndw > 0xFFFFE)
3468 ndw = 0xFFFFE;
3469
3470 /* for non-physically contiguous pages (system) */
3471 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3472 ib->ptr[ib->length_dw++] = pe;
3473 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3474 ib->ptr[ib->length_dw++] = ndw;
3475 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3476 if (flags & RADEON_VM_PAGE_SYSTEM) {
3477 value = radeon_vm_map_gart(rdev, addr);
3478 value &= 0xFFFFFFFFFFFFF000ULL;
3479 } else if (flags & RADEON_VM_PAGE_VALID) {
3480 value = addr;
3481 } else {
3482 value = 0;
3483 }
3484 addr += incr;
3485 value |= r600_flags;
3486 ib->ptr[ib->length_dw++] = value;
3487 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3488 }
3489 }
3490 } else {
3491 while (count) {
3492 ndw = count;
3493 if (ndw > 0x7FFFF)
3494 ndw = 0x7FFFF;
3495
3496 if (flags & RADEON_VM_PAGE_VALID)
3497 value = addr;
3498 else
3499 value = 0;
3500 /* for physically contiguous pages (vram) */
3501 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
3502 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3503 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3504 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3505 ib->ptr[ib->length_dw++] = 0;
3506 ib->ptr[ib->length_dw++] = value; /* value */
3507 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3508 ib->ptr[ib->length_dw++] = incr; /* increment size */
3509 ib->ptr[ib->length_dw++] = 0;
3510 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
3511 pe += ndw * 8;
3512 addr += ndw * incr;
3513 count -= ndw;
3514 }
3515 }
3516 while (ib->length_dw & 0x7)
3517 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
3518 }
3519}
3520
3521/**
Alex Deucher605de6b2012-10-22 13:04:03 -04003522 * cik_dma_vm_flush - cik vm flush using sDMA
3523 *
3524 * @rdev: radeon_device pointer
3525 *
3526 * Update the page table base and flush the VM TLB
3527 * using sDMA (CIK).
3528 */
3529void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3530{
3531 struct radeon_ring *ring = &rdev->ring[ridx];
3532 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3533 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3534 u32 ref_and_mask;
3535
3536 if (vm == NULL)
3537 return;
3538
3539 if (ridx == R600_RING_TYPE_DMA_INDEX)
3540 ref_and_mask = SDMA0;
3541 else
3542 ref_and_mask = SDMA1;
3543
3544 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3545 if (vm->id < 8) {
3546 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
3547 } else {
3548 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
3549 }
3550 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3551
3552 /* update SH_MEM_* regs */
3553 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3554 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3555 radeon_ring_write(ring, VMID(vm->id));
3556
3557 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3558 radeon_ring_write(ring, SH_MEM_BASES >> 2);
3559 radeon_ring_write(ring, 0);
3560
3561 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3562 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
3563 radeon_ring_write(ring, 0);
3564
3565 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3566 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
3567 radeon_ring_write(ring, 1);
3568
3569 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3570 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
3571 radeon_ring_write(ring, 0);
3572
3573 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3574 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
3575 radeon_ring_write(ring, VMID(0));
3576
3577 /* flush HDP */
3578 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3579 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3580 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3581 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3582 radeon_ring_write(ring, ref_and_mask); /* MASK */
3583 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3584
3585 /* flush TLB */
3586 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
3587 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
3588 radeon_ring_write(ring, 1 << vm->id);
3589}
3590
Alex Deucherf6796ca2012-11-09 10:44:08 -05003591/*
3592 * RLC
3593 * The RLC is a multi-purpose microengine that handles a
3594 * variety of functions, the most important of which is
3595 * the interrupt controller.
3596 */
3597/**
3598 * cik_rlc_stop - stop the RLC ME
3599 *
3600 * @rdev: radeon_device pointer
3601 *
3602 * Halt the RLC ME (MicroEngine) (CIK).
3603 */
3604static void cik_rlc_stop(struct radeon_device *rdev)
3605{
3606 int i, j, k;
3607 u32 mask, tmp;
3608
3609 tmp = RREG32(CP_INT_CNTL_RING0);
3610 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3611 WREG32(CP_INT_CNTL_RING0, tmp);
3612
3613 RREG32(CB_CGTT_SCLK_CTRL);
3614 RREG32(CB_CGTT_SCLK_CTRL);
3615 RREG32(CB_CGTT_SCLK_CTRL);
3616 RREG32(CB_CGTT_SCLK_CTRL);
3617
3618 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
3619 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
3620
3621 WREG32(RLC_CNTL, 0);
3622
3623 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3624 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3625 cik_select_se_sh(rdev, i, j);
3626 for (k = 0; k < rdev->usec_timeout; k++) {
3627 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
3628 break;
3629 udelay(1);
3630 }
3631 }
3632 }
3633 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3634
3635 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
3636 for (k = 0; k < rdev->usec_timeout; k++) {
3637 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3638 break;
3639 udelay(1);
3640 }
3641}
3642
3643/**
3644 * cik_rlc_start - start the RLC ME
3645 *
3646 * @rdev: radeon_device pointer
3647 *
3648 * Unhalt the RLC ME (MicroEngine) (CIK).
3649 */
3650static void cik_rlc_start(struct radeon_device *rdev)
3651{
3652 u32 tmp;
3653
3654 WREG32(RLC_CNTL, RLC_ENABLE);
3655
3656 tmp = RREG32(CP_INT_CNTL_RING0);
3657 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3658 WREG32(CP_INT_CNTL_RING0, tmp);
3659
3660 udelay(50);
3661}
3662
3663/**
3664 * cik_rlc_resume - setup the RLC hw
3665 *
3666 * @rdev: radeon_device pointer
3667 *
3668 * Initialize the RLC registers, load the ucode,
3669 * and start the RLC (CIK).
3670 * Returns 0 for success, -EINVAL if the ucode is not available.
3671 */
3672static int cik_rlc_resume(struct radeon_device *rdev)
3673{
3674 u32 i, size;
3675 u32 clear_state_info[3];
3676 const __be32 *fw_data;
3677
3678 if (!rdev->rlc_fw)
3679 return -EINVAL;
3680
3681 switch (rdev->family) {
3682 case CHIP_BONAIRE:
3683 default:
3684 size = BONAIRE_RLC_UCODE_SIZE;
3685 break;
3686 case CHIP_KAVERI:
3687 size = KV_RLC_UCODE_SIZE;
3688 break;
3689 case CHIP_KABINI:
3690 size = KB_RLC_UCODE_SIZE;
3691 break;
3692 }
3693
3694 cik_rlc_stop(rdev);
3695
3696 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
3697 RREG32(GRBM_SOFT_RESET);
3698 udelay(50);
3699 WREG32(GRBM_SOFT_RESET, 0);
3700 RREG32(GRBM_SOFT_RESET);
3701 udelay(50);
3702
3703 WREG32(RLC_LB_CNTR_INIT, 0);
3704 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
3705
3706 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3707 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
3708 WREG32(RLC_LB_PARAMS, 0x00600408);
3709 WREG32(RLC_LB_CNTL, 0x80000004);
3710
3711 WREG32(RLC_MC_CNTL, 0);
3712 WREG32(RLC_UCODE_CNTL, 0);
3713
3714 fw_data = (const __be32 *)rdev->rlc_fw->data;
3715 WREG32(RLC_GPM_UCODE_ADDR, 0);
3716 for (i = 0; i < size; i++)
3717 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
3718 WREG32(RLC_GPM_UCODE_ADDR, 0);
3719
3720 /* XXX */
3721 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
3722 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
3723 clear_state_info[2] = 0;//cik_default_size;
3724 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
3725 for (i = 0; i < 3; i++)
3726 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
3727 WREG32(RLC_DRIVER_DMA_STATUS, 0);
3728
3729 cik_rlc_start(rdev);
3730
3731 return 0;
3732}
Alex Deuchera59781b2012-11-09 10:45:57 -05003733
3734/*
3735 * Interrupts
3736 * Starting with r6xx, interrupts are handled via a ring buffer.
3737 * Ring buffers are areas of GPU accessible memory that the GPU
3738 * writes interrupt vectors into and the host reads vectors out of.
3739 * There is a rptr (read pointer) that determines where the
3740 * host is currently reading, and a wptr (write pointer)
3741 * which determines where the GPU has written. When the
3742 * pointers are equal, the ring is idle. When the GPU
3743 * writes vectors to the ring buffer, it increments the
3744 * wptr. When there is an interrupt, the host then starts
3745 * fetching commands and processing them until the pointers are
3746 * equal again at which point it updates the rptr.
3747 */
3748
3749/**
3750 * cik_enable_interrupts - Enable the interrupt ring buffer
3751 *
3752 * @rdev: radeon_device pointer
3753 *
3754 * Enable the interrupt ring buffer (CIK).
3755 */
3756static void cik_enable_interrupts(struct radeon_device *rdev)
3757{
3758 u32 ih_cntl = RREG32(IH_CNTL);
3759 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3760
3761 ih_cntl |= ENABLE_INTR;
3762 ih_rb_cntl |= IH_RB_ENABLE;
3763 WREG32(IH_CNTL, ih_cntl);
3764 WREG32(IH_RB_CNTL, ih_rb_cntl);
3765 rdev->ih.enabled = true;
3766}
3767
3768/**
3769 * cik_disable_interrupts - Disable the interrupt ring buffer
3770 *
3771 * @rdev: radeon_device pointer
3772 *
3773 * Disable the interrupt ring buffer (CIK).
3774 */
3775static void cik_disable_interrupts(struct radeon_device *rdev)
3776{
3777 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3778 u32 ih_cntl = RREG32(IH_CNTL);
3779
3780 ih_rb_cntl &= ~IH_RB_ENABLE;
3781 ih_cntl &= ~ENABLE_INTR;
3782 WREG32(IH_RB_CNTL, ih_rb_cntl);
3783 WREG32(IH_CNTL, ih_cntl);
3784 /* set rptr, wptr to 0 */
3785 WREG32(IH_RB_RPTR, 0);
3786 WREG32(IH_RB_WPTR, 0);
3787 rdev->ih.enabled = false;
3788 rdev->ih.rptr = 0;
3789}
3790
3791/**
3792 * cik_disable_interrupt_state - Disable all interrupt sources
3793 *
3794 * @rdev: radeon_device pointer
3795 *
3796 * Clear all interrupt enable bits used by the driver (CIK).
3797 */
3798static void cik_disable_interrupt_state(struct radeon_device *rdev)
3799{
3800 u32 tmp;
3801
3802 /* gfx ring */
3803 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04003804 /* sdma */
3805 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3806 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3807 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3808 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05003809 /* compute queues */
3810 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
3811 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
3812 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
3813 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
3814 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
3815 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
3816 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
3817 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
3818 /* grbm */
3819 WREG32(GRBM_INT_CNTL, 0);
3820 /* vline/vblank, etc. */
3821 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3822 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
3823 if (rdev->num_crtc >= 4) {
3824 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
3825 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
3826 }
3827 if (rdev->num_crtc >= 6) {
3828 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
3829 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
3830 }
3831
3832 /* dac hotplug */
3833 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
3834
3835 /* digital hotplug */
3836 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3837 WREG32(DC_HPD1_INT_CONTROL, tmp);
3838 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3839 WREG32(DC_HPD2_INT_CONTROL, tmp);
3840 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3841 WREG32(DC_HPD3_INT_CONTROL, tmp);
3842 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3843 WREG32(DC_HPD4_INT_CONTROL, tmp);
3844 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3845 WREG32(DC_HPD5_INT_CONTROL, tmp);
3846 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3847 WREG32(DC_HPD6_INT_CONTROL, tmp);
3848
3849}
3850
3851/**
3852 * cik_irq_init - init and enable the interrupt ring
3853 *
3854 * @rdev: radeon_device pointer
3855 *
3856 * Allocate a ring buffer for the interrupt controller,
3857 * enable the RLC, disable interrupts, enable the IH
3858 * ring buffer and enable it (CIK).
3859 * Called at device load and reume.
3860 * Returns 0 for success, errors for failure.
3861 */
3862static int cik_irq_init(struct radeon_device *rdev)
3863{
3864 int ret = 0;
3865 int rb_bufsz;
3866 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3867
3868 /* allocate ring */
3869 ret = r600_ih_ring_alloc(rdev);
3870 if (ret)
3871 return ret;
3872
3873 /* disable irqs */
3874 cik_disable_interrupts(rdev);
3875
3876 /* init rlc */
3877 ret = cik_rlc_resume(rdev);
3878 if (ret) {
3879 r600_ih_ring_fini(rdev);
3880 return ret;
3881 }
3882
3883 /* setup interrupt control */
3884 /* XXX this should actually be a bus address, not an MC address. same on older asics */
3885 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3886 interrupt_cntl = RREG32(INTERRUPT_CNTL);
3887 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3888 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3889 */
3890 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3891 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3892 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3893 WREG32(INTERRUPT_CNTL, interrupt_cntl);
3894
3895 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3896 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3897
3898 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3899 IH_WPTR_OVERFLOW_CLEAR |
3900 (rb_bufsz << 1));
3901
3902 if (rdev->wb.enabled)
3903 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3904
3905 /* set the writeback address whether it's enabled or not */
3906 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3907 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3908
3909 WREG32(IH_RB_CNTL, ih_rb_cntl);
3910
3911 /* set rptr, wptr to 0 */
3912 WREG32(IH_RB_RPTR, 0);
3913 WREG32(IH_RB_WPTR, 0);
3914
3915 /* Default settings for IH_CNTL (disabled at first) */
3916 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3917 /* RPTR_REARM only works if msi's are enabled */
3918 if (rdev->msi_enabled)
3919 ih_cntl |= RPTR_REARM;
3920 WREG32(IH_CNTL, ih_cntl);
3921
3922 /* force the active interrupt state to all disabled */
3923 cik_disable_interrupt_state(rdev);
3924
3925 pci_set_master(rdev->pdev);
3926
3927 /* enable irqs */
3928 cik_enable_interrupts(rdev);
3929
3930 return ret;
3931}
3932
3933/**
3934 * cik_irq_set - enable/disable interrupt sources
3935 *
3936 * @rdev: radeon_device pointer
3937 *
3938 * Enable interrupt sources on the GPU (vblanks, hpd,
3939 * etc.) (CIK).
3940 * Returns 0 for success, errors for failure.
3941 */
3942int cik_irq_set(struct radeon_device *rdev)
3943{
3944 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
3945 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
3946 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3947 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3948 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04003949 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05003950
3951 if (!rdev->irq.installed) {
3952 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3953 return -EINVAL;
3954 }
3955 /* don't enable anything if the ih is disabled */
3956 if (!rdev->ih.enabled) {
3957 cik_disable_interrupts(rdev);
3958 /* force the active interrupt state to all disabled */
3959 cik_disable_interrupt_state(rdev);
3960 return 0;
3961 }
3962
3963 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3964 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3965 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3966 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3967 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3968 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3969
Alex Deucher21a93e12013-04-09 12:47:11 -04003970 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3971 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3972
Alex Deuchera59781b2012-11-09 10:45:57 -05003973 /* enable CP interrupts on all rings */
3974 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3975 DRM_DEBUG("cik_irq_set: sw int gfx\n");
3976 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3977 }
3978 /* TODO: compute queues! */
3979 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
3980
Alex Deucher21a93e12013-04-09 12:47:11 -04003981 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3982 DRM_DEBUG("cik_irq_set: sw int dma\n");
3983 dma_cntl |= TRAP_ENABLE;
3984 }
3985
3986 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3987 DRM_DEBUG("cik_irq_set: sw int dma1\n");
3988 dma_cntl1 |= TRAP_ENABLE;
3989 }
3990
Alex Deuchera59781b2012-11-09 10:45:57 -05003991 if (rdev->irq.crtc_vblank_int[0] ||
3992 atomic_read(&rdev->irq.pflip[0])) {
3993 DRM_DEBUG("cik_irq_set: vblank 0\n");
3994 crtc1 |= VBLANK_INTERRUPT_MASK;
3995 }
3996 if (rdev->irq.crtc_vblank_int[1] ||
3997 atomic_read(&rdev->irq.pflip[1])) {
3998 DRM_DEBUG("cik_irq_set: vblank 1\n");
3999 crtc2 |= VBLANK_INTERRUPT_MASK;
4000 }
4001 if (rdev->irq.crtc_vblank_int[2] ||
4002 atomic_read(&rdev->irq.pflip[2])) {
4003 DRM_DEBUG("cik_irq_set: vblank 2\n");
4004 crtc3 |= VBLANK_INTERRUPT_MASK;
4005 }
4006 if (rdev->irq.crtc_vblank_int[3] ||
4007 atomic_read(&rdev->irq.pflip[3])) {
4008 DRM_DEBUG("cik_irq_set: vblank 3\n");
4009 crtc4 |= VBLANK_INTERRUPT_MASK;
4010 }
4011 if (rdev->irq.crtc_vblank_int[4] ||
4012 atomic_read(&rdev->irq.pflip[4])) {
4013 DRM_DEBUG("cik_irq_set: vblank 4\n");
4014 crtc5 |= VBLANK_INTERRUPT_MASK;
4015 }
4016 if (rdev->irq.crtc_vblank_int[5] ||
4017 atomic_read(&rdev->irq.pflip[5])) {
4018 DRM_DEBUG("cik_irq_set: vblank 5\n");
4019 crtc6 |= VBLANK_INTERRUPT_MASK;
4020 }
4021 if (rdev->irq.hpd[0]) {
4022 DRM_DEBUG("cik_irq_set: hpd 1\n");
4023 hpd1 |= DC_HPDx_INT_EN;
4024 }
4025 if (rdev->irq.hpd[1]) {
4026 DRM_DEBUG("cik_irq_set: hpd 2\n");
4027 hpd2 |= DC_HPDx_INT_EN;
4028 }
4029 if (rdev->irq.hpd[2]) {
4030 DRM_DEBUG("cik_irq_set: hpd 3\n");
4031 hpd3 |= DC_HPDx_INT_EN;
4032 }
4033 if (rdev->irq.hpd[3]) {
4034 DRM_DEBUG("cik_irq_set: hpd 4\n");
4035 hpd4 |= DC_HPDx_INT_EN;
4036 }
4037 if (rdev->irq.hpd[4]) {
4038 DRM_DEBUG("cik_irq_set: hpd 5\n");
4039 hpd5 |= DC_HPDx_INT_EN;
4040 }
4041 if (rdev->irq.hpd[5]) {
4042 DRM_DEBUG("cik_irq_set: hpd 6\n");
4043 hpd6 |= DC_HPDx_INT_EN;
4044 }
4045
4046 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4047
Alex Deucher21a93e12013-04-09 12:47:11 -04004048 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
4049 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
4050
Alex Deuchera59781b2012-11-09 10:45:57 -05004051 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4052
4053 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4054 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4055 if (rdev->num_crtc >= 4) {
4056 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4057 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4058 }
4059 if (rdev->num_crtc >= 6) {
4060 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4061 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4062 }
4063
4064 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4065 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4066 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4067 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4068 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4069 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4070
4071 return 0;
4072}
4073
4074/**
4075 * cik_irq_ack - ack interrupt sources
4076 *
4077 * @rdev: radeon_device pointer
4078 *
4079 * Ack interrupt sources on the GPU (vblanks, hpd,
4080 * etc.) (CIK). Certain interrupts sources are sw
4081 * generated and do not require an explicit ack.
4082 */
4083static inline void cik_irq_ack(struct radeon_device *rdev)
4084{
4085 u32 tmp;
4086
4087 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4088 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4089 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4090 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4091 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4092 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4093 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
4094
4095 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
4096 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4097 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
4098 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4099 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4100 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4101 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4102 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4103
4104 if (rdev->num_crtc >= 4) {
4105 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4106 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4107 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4108 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4109 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4110 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4111 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4112 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4113 }
4114
4115 if (rdev->num_crtc >= 6) {
4116 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4117 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4118 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4119 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4120 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4121 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4122 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4123 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4124 }
4125
4126 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4127 tmp = RREG32(DC_HPD1_INT_CONTROL);
4128 tmp |= DC_HPDx_INT_ACK;
4129 WREG32(DC_HPD1_INT_CONTROL, tmp);
4130 }
4131 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4132 tmp = RREG32(DC_HPD2_INT_CONTROL);
4133 tmp |= DC_HPDx_INT_ACK;
4134 WREG32(DC_HPD2_INT_CONTROL, tmp);
4135 }
4136 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4137 tmp = RREG32(DC_HPD3_INT_CONTROL);
4138 tmp |= DC_HPDx_INT_ACK;
4139 WREG32(DC_HPD3_INT_CONTROL, tmp);
4140 }
4141 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4142 tmp = RREG32(DC_HPD4_INT_CONTROL);
4143 tmp |= DC_HPDx_INT_ACK;
4144 WREG32(DC_HPD4_INT_CONTROL, tmp);
4145 }
4146 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4147 tmp = RREG32(DC_HPD5_INT_CONTROL);
4148 tmp |= DC_HPDx_INT_ACK;
4149 WREG32(DC_HPD5_INT_CONTROL, tmp);
4150 }
4151 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4152 tmp = RREG32(DC_HPD5_INT_CONTROL);
4153 tmp |= DC_HPDx_INT_ACK;
4154 WREG32(DC_HPD6_INT_CONTROL, tmp);
4155 }
4156}
4157
4158/**
4159 * cik_irq_disable - disable interrupts
4160 *
4161 * @rdev: radeon_device pointer
4162 *
4163 * Disable interrupts on the hw (CIK).
4164 */
4165static void cik_irq_disable(struct radeon_device *rdev)
4166{
4167 cik_disable_interrupts(rdev);
4168 /* Wait and acknowledge irq */
4169 mdelay(1);
4170 cik_irq_ack(rdev);
4171 cik_disable_interrupt_state(rdev);
4172}
4173
4174/**
4175 * cik_irq_disable - disable interrupts for suspend
4176 *
4177 * @rdev: radeon_device pointer
4178 *
4179 * Disable interrupts and stop the RLC (CIK).
4180 * Used for suspend.
4181 */
4182static void cik_irq_suspend(struct radeon_device *rdev)
4183{
4184 cik_irq_disable(rdev);
4185 cik_rlc_stop(rdev);
4186}
4187
4188/**
4189 * cik_irq_fini - tear down interrupt support
4190 *
4191 * @rdev: radeon_device pointer
4192 *
4193 * Disable interrupts on the hw and free the IH ring
4194 * buffer (CIK).
4195 * Used for driver unload.
4196 */
4197static void cik_irq_fini(struct radeon_device *rdev)
4198{
4199 cik_irq_suspend(rdev);
4200 r600_ih_ring_fini(rdev);
4201}
4202
4203/**
4204 * cik_get_ih_wptr - get the IH ring buffer wptr
4205 *
4206 * @rdev: radeon_device pointer
4207 *
4208 * Get the IH ring buffer wptr from either the register
4209 * or the writeback memory buffer (CIK). Also check for
4210 * ring buffer overflow and deal with it.
4211 * Used by cik_irq_process().
4212 * Returns the value of the wptr.
4213 */
4214static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
4215{
4216 u32 wptr, tmp;
4217
4218 if (rdev->wb.enabled)
4219 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4220 else
4221 wptr = RREG32(IH_RB_WPTR);
4222
4223 if (wptr & RB_OVERFLOW) {
4224 /* When a ring buffer overflow happen start parsing interrupt
4225 * from the last not overwritten vector (wptr + 16). Hopefully
4226 * this should allow us to catchup.
4227 */
4228 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4229 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4230 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4231 tmp = RREG32(IH_RB_CNTL);
4232 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4233 WREG32(IH_RB_CNTL, tmp);
4234 }
4235 return (wptr & rdev->ih.ptr_mask);
4236}
4237
4238/* CIK IV Ring
4239 * Each IV ring entry is 128 bits:
4240 * [7:0] - interrupt source id
4241 * [31:8] - reserved
4242 * [59:32] - interrupt source data
4243 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04004244 * [71:64] - RINGID
4245 * CP:
4246 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05004247 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4248 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4249 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4250 * PIPE_ID - ME0 0=3D
4251 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04004252 * SDMA:
4253 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4254 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4255 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05004256 * [79:72] - VMID
4257 * [95:80] - PASID
4258 * [127:96] - reserved
4259 */
4260/**
4261 * cik_irq_process - interrupt handler
4262 *
4263 * @rdev: radeon_device pointer
4264 *
4265 * Interrupt hander (CIK). Walk the IH ring,
4266 * ack interrupts and schedule work to handle
4267 * interrupt events.
4268 * Returns irq process return code.
4269 */
4270int cik_irq_process(struct radeon_device *rdev)
4271{
4272 u32 wptr;
4273 u32 rptr;
4274 u32 src_id, src_data, ring_id;
4275 u8 me_id, pipe_id, queue_id;
4276 u32 ring_index;
4277 bool queue_hotplug = false;
4278 bool queue_reset = false;
4279
4280 if (!rdev->ih.enabled || rdev->shutdown)
4281 return IRQ_NONE;
4282
4283 wptr = cik_get_ih_wptr(rdev);
4284
4285restart_ih:
4286 /* is somebody else already processing irqs? */
4287 if (atomic_xchg(&rdev->ih.lock, 1))
4288 return IRQ_NONE;
4289
4290 rptr = rdev->ih.rptr;
4291 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4292
4293 /* Order reading of wptr vs. reading of IH ring data */
4294 rmb();
4295
4296 /* display interrupts */
4297 cik_irq_ack(rdev);
4298
4299 while (rptr != wptr) {
4300 /* wptr/rptr are in bytes! */
4301 ring_index = rptr / 4;
4302 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4303 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4304 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05004305
4306 switch (src_id) {
4307 case 1: /* D1 vblank/vline */
4308 switch (src_data) {
4309 case 0: /* D1 vblank */
4310 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
4311 if (rdev->irq.crtc_vblank_int[0]) {
4312 drm_handle_vblank(rdev->ddev, 0);
4313 rdev->pm.vblank_sync = true;
4314 wake_up(&rdev->irq.vblank_queue);
4315 }
4316 if (atomic_read(&rdev->irq.pflip[0]))
4317 radeon_crtc_handle_flip(rdev, 0);
4318 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4319 DRM_DEBUG("IH: D1 vblank\n");
4320 }
4321 break;
4322 case 1: /* D1 vline */
4323 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
4324 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4325 DRM_DEBUG("IH: D1 vline\n");
4326 }
4327 break;
4328 default:
4329 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4330 break;
4331 }
4332 break;
4333 case 2: /* D2 vblank/vline */
4334 switch (src_data) {
4335 case 0: /* D2 vblank */
4336 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4337 if (rdev->irq.crtc_vblank_int[1]) {
4338 drm_handle_vblank(rdev->ddev, 1);
4339 rdev->pm.vblank_sync = true;
4340 wake_up(&rdev->irq.vblank_queue);
4341 }
4342 if (atomic_read(&rdev->irq.pflip[1]))
4343 radeon_crtc_handle_flip(rdev, 1);
4344 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4345 DRM_DEBUG("IH: D2 vblank\n");
4346 }
4347 break;
4348 case 1: /* D2 vline */
4349 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4350 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4351 DRM_DEBUG("IH: D2 vline\n");
4352 }
4353 break;
4354 default:
4355 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4356 break;
4357 }
4358 break;
4359 case 3: /* D3 vblank/vline */
4360 switch (src_data) {
4361 case 0: /* D3 vblank */
4362 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4363 if (rdev->irq.crtc_vblank_int[2]) {
4364 drm_handle_vblank(rdev->ddev, 2);
4365 rdev->pm.vblank_sync = true;
4366 wake_up(&rdev->irq.vblank_queue);
4367 }
4368 if (atomic_read(&rdev->irq.pflip[2]))
4369 radeon_crtc_handle_flip(rdev, 2);
4370 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4371 DRM_DEBUG("IH: D3 vblank\n");
4372 }
4373 break;
4374 case 1: /* D3 vline */
4375 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4376 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4377 DRM_DEBUG("IH: D3 vline\n");
4378 }
4379 break;
4380 default:
4381 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4382 break;
4383 }
4384 break;
4385 case 4: /* D4 vblank/vline */
4386 switch (src_data) {
4387 case 0: /* D4 vblank */
4388 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4389 if (rdev->irq.crtc_vblank_int[3]) {
4390 drm_handle_vblank(rdev->ddev, 3);
4391 rdev->pm.vblank_sync = true;
4392 wake_up(&rdev->irq.vblank_queue);
4393 }
4394 if (atomic_read(&rdev->irq.pflip[3]))
4395 radeon_crtc_handle_flip(rdev, 3);
4396 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4397 DRM_DEBUG("IH: D4 vblank\n");
4398 }
4399 break;
4400 case 1: /* D4 vline */
4401 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4402 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4403 DRM_DEBUG("IH: D4 vline\n");
4404 }
4405 break;
4406 default:
4407 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4408 break;
4409 }
4410 break;
4411 case 5: /* D5 vblank/vline */
4412 switch (src_data) {
4413 case 0: /* D5 vblank */
4414 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4415 if (rdev->irq.crtc_vblank_int[4]) {
4416 drm_handle_vblank(rdev->ddev, 4);
4417 rdev->pm.vblank_sync = true;
4418 wake_up(&rdev->irq.vblank_queue);
4419 }
4420 if (atomic_read(&rdev->irq.pflip[4]))
4421 radeon_crtc_handle_flip(rdev, 4);
4422 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4423 DRM_DEBUG("IH: D5 vblank\n");
4424 }
4425 break;
4426 case 1: /* D5 vline */
4427 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4428 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4429 DRM_DEBUG("IH: D5 vline\n");
4430 }
4431 break;
4432 default:
4433 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4434 break;
4435 }
4436 break;
4437 case 6: /* D6 vblank/vline */
4438 switch (src_data) {
4439 case 0: /* D6 vblank */
4440 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4441 if (rdev->irq.crtc_vblank_int[5]) {
4442 drm_handle_vblank(rdev->ddev, 5);
4443 rdev->pm.vblank_sync = true;
4444 wake_up(&rdev->irq.vblank_queue);
4445 }
4446 if (atomic_read(&rdev->irq.pflip[5]))
4447 radeon_crtc_handle_flip(rdev, 5);
4448 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4449 DRM_DEBUG("IH: D6 vblank\n");
4450 }
4451 break;
4452 case 1: /* D6 vline */
4453 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4454 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4455 DRM_DEBUG("IH: D6 vline\n");
4456 }
4457 break;
4458 default:
4459 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4460 break;
4461 }
4462 break;
4463 case 42: /* HPD hotplug */
4464 switch (src_data) {
4465 case 0:
4466 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
4467 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
4468 queue_hotplug = true;
4469 DRM_DEBUG("IH: HPD1\n");
4470 }
4471 break;
4472 case 1:
4473 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
4474 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4475 queue_hotplug = true;
4476 DRM_DEBUG("IH: HPD2\n");
4477 }
4478 break;
4479 case 2:
4480 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4481 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4482 queue_hotplug = true;
4483 DRM_DEBUG("IH: HPD3\n");
4484 }
4485 break;
4486 case 3:
4487 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4488 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4489 queue_hotplug = true;
4490 DRM_DEBUG("IH: HPD4\n");
4491 }
4492 break;
4493 case 4:
4494 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4495 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4496 queue_hotplug = true;
4497 DRM_DEBUG("IH: HPD5\n");
4498 }
4499 break;
4500 case 5:
4501 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4502 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4503 queue_hotplug = true;
4504 DRM_DEBUG("IH: HPD6\n");
4505 }
4506 break;
4507 default:
4508 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4509 break;
4510 }
4511 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04004512 case 146:
4513 case 147:
4514 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4515 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4516 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4517 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4518 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4519 /* reset addr and status */
4520 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4521 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004522 case 176: /* GFX RB CP_INT */
4523 case 177: /* GFX IB CP_INT */
4524 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4525 break;
4526 case 181: /* CP EOP event */
4527 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004528 /* XXX check the bitfield order! */
4529 me_id = (ring_id & 0x60) >> 5;
4530 pipe_id = (ring_id & 0x18) >> 3;
4531 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004532 switch (me_id) {
4533 case 0:
4534 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4535 break;
4536 case 1:
4537 /* XXX compute */
4538 break;
4539 case 2:
4540 /* XXX compute */
4541 break;
4542 }
4543 break;
4544 case 184: /* CP Privileged reg access */
4545 DRM_ERROR("Illegal register access in command stream\n");
4546 /* XXX check the bitfield order! */
4547 me_id = (ring_id & 0x60) >> 5;
4548 pipe_id = (ring_id & 0x18) >> 3;
4549 queue_id = (ring_id & 0x7) >> 0;
4550 switch (me_id) {
4551 case 0:
4552 /* This results in a full GPU reset, but all we need to do is soft
4553 * reset the CP for gfx
4554 */
4555 queue_reset = true;
4556 break;
4557 case 1:
4558 /* XXX compute */
4559 break;
4560 case 2:
4561 /* XXX compute */
4562 break;
4563 }
4564 break;
4565 case 185: /* CP Privileged inst */
4566 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04004567 /* XXX check the bitfield order! */
4568 me_id = (ring_id & 0x60) >> 5;
4569 pipe_id = (ring_id & 0x18) >> 3;
4570 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05004571 switch (me_id) {
4572 case 0:
4573 /* This results in a full GPU reset, but all we need to do is soft
4574 * reset the CP for gfx
4575 */
4576 queue_reset = true;
4577 break;
4578 case 1:
4579 /* XXX compute */
4580 break;
4581 case 2:
4582 /* XXX compute */
4583 break;
4584 }
4585 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04004586 case 224: /* SDMA trap event */
4587 /* XXX check the bitfield order! */
4588 me_id = (ring_id & 0x3) >> 0;
4589 queue_id = (ring_id & 0xc) >> 2;
4590 DRM_DEBUG("IH: SDMA trap\n");
4591 switch (me_id) {
4592 case 0:
4593 switch (queue_id) {
4594 case 0:
4595 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4596 break;
4597 case 1:
4598 /* XXX compute */
4599 break;
4600 case 2:
4601 /* XXX compute */
4602 break;
4603 }
4604 break;
4605 case 1:
4606 switch (queue_id) {
4607 case 0:
4608 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4609 break;
4610 case 1:
4611 /* XXX compute */
4612 break;
4613 case 2:
4614 /* XXX compute */
4615 break;
4616 }
4617 break;
4618 }
4619 break;
4620 case 241: /* SDMA Privileged inst */
4621 case 247: /* SDMA Privileged inst */
4622 DRM_ERROR("Illegal instruction in SDMA command stream\n");
4623 /* XXX check the bitfield order! */
4624 me_id = (ring_id & 0x3) >> 0;
4625 queue_id = (ring_id & 0xc) >> 2;
4626 switch (me_id) {
4627 case 0:
4628 switch (queue_id) {
4629 case 0:
4630 queue_reset = true;
4631 break;
4632 case 1:
4633 /* XXX compute */
4634 queue_reset = true;
4635 break;
4636 case 2:
4637 /* XXX compute */
4638 queue_reset = true;
4639 break;
4640 }
4641 break;
4642 case 1:
4643 switch (queue_id) {
4644 case 0:
4645 queue_reset = true;
4646 break;
4647 case 1:
4648 /* XXX compute */
4649 queue_reset = true;
4650 break;
4651 case 2:
4652 /* XXX compute */
4653 queue_reset = true;
4654 break;
4655 }
4656 break;
4657 }
4658 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05004659 case 233: /* GUI IDLE */
4660 DRM_DEBUG("IH: GUI idle\n");
4661 break;
4662 default:
4663 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4664 break;
4665 }
4666
4667 /* wptr/rptr are in bytes! */
4668 rptr += 16;
4669 rptr &= rdev->ih.ptr_mask;
4670 }
4671 if (queue_hotplug)
4672 schedule_work(&rdev->hotplug_work);
4673 if (queue_reset)
4674 schedule_work(&rdev->reset_work);
4675 rdev->ih.rptr = rptr;
4676 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4677 atomic_set(&rdev->ih.lock, 0);
4678
4679 /* make sure wptr hasn't changed while processing */
4680 wptr = cik_get_ih_wptr(rdev);
4681 if (wptr != rptr)
4682 goto restart_ih;
4683
4684 return IRQ_HANDLED;
4685}
Alex Deucher7bf94a22012-08-17 11:48:29 -04004686
4687/*
4688 * startup/shutdown callbacks
4689 */
4690/**
4691 * cik_startup - program the asic to a functional state
4692 *
4693 * @rdev: radeon_device pointer
4694 *
4695 * Programs the asic to a functional state (CIK).
4696 * Called by cik_init() and cik_resume().
4697 * Returns 0 for success, error for failure.
4698 */
4699static int cik_startup(struct radeon_device *rdev)
4700{
4701 struct radeon_ring *ring;
4702 int r;
4703
4704 if (rdev->flags & RADEON_IS_IGP) {
4705 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4706 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
4707 r = cik_init_microcode(rdev);
4708 if (r) {
4709 DRM_ERROR("Failed to load firmware!\n");
4710 return r;
4711 }
4712 }
4713 } else {
4714 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
4715 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
4716 !rdev->mc_fw) {
4717 r = cik_init_microcode(rdev);
4718 if (r) {
4719 DRM_ERROR("Failed to load firmware!\n");
4720 return r;
4721 }
4722 }
4723
4724 r = ci_mc_load_microcode(rdev);
4725 if (r) {
4726 DRM_ERROR("Failed to load MC firmware!\n");
4727 return r;
4728 }
4729 }
4730
4731 r = r600_vram_scratch_init(rdev);
4732 if (r)
4733 return r;
4734
4735 cik_mc_program(rdev);
4736 r = cik_pcie_gart_enable(rdev);
4737 if (r)
4738 return r;
4739 cik_gpu_init(rdev);
4740
4741 /* allocate rlc buffers */
4742 r = si_rlc_init(rdev);
4743 if (r) {
4744 DRM_ERROR("Failed to init rlc BOs!\n");
4745 return r;
4746 }
4747
4748 /* allocate wb buffer */
4749 r = radeon_wb_init(rdev);
4750 if (r)
4751 return r;
4752
4753 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
4754 if (r) {
4755 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
4756 return r;
4757 }
4758
4759 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4760 if (r) {
4761 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4762 return r;
4763 }
4764
4765 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4766 if (r) {
4767 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4768 return r;
4769 }
4770
4771 /* Enable IRQ */
4772 if (!rdev->irq.installed) {
4773 r = radeon_irq_kms_init(rdev);
4774 if (r)
4775 return r;
4776 }
4777
4778 r = cik_irq_init(rdev);
4779 if (r) {
4780 DRM_ERROR("radeon: IH init failed (%d).\n", r);
4781 radeon_irq_kms_fini(rdev);
4782 return r;
4783 }
4784 cik_irq_set(rdev);
4785
4786 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4787 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
4788 CP_RB0_RPTR, CP_RB0_WPTR,
4789 0, 0xfffff, RADEON_CP_PACKET2);
4790 if (r)
4791 return r;
4792
4793 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4794 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4795 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
4796 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
4797 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4798 if (r)
4799 return r;
4800
4801 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4802 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4803 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
4804 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
4805 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4806 if (r)
4807 return r;
4808
4809 r = cik_cp_resume(rdev);
4810 if (r)
4811 return r;
4812
4813 r = cik_sdma_resume(rdev);
4814 if (r)
4815 return r;
4816
4817 r = radeon_ib_pool_init(rdev);
4818 if (r) {
4819 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4820 return r;
4821 }
4822
4823 r = radeon_vm_manager_init(rdev);
4824 if (r) {
4825 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
4826 return r;
4827 }
4828
4829 return 0;
4830}
4831
4832/**
4833 * cik_resume - resume the asic to a functional state
4834 *
4835 * @rdev: radeon_device pointer
4836 *
4837 * Programs the asic to a functional state (CIK).
4838 * Called at resume.
4839 * Returns 0 for success, error for failure.
4840 */
4841int cik_resume(struct radeon_device *rdev)
4842{
4843 int r;
4844
4845 /* post card */
4846 atom_asic_init(rdev->mode_info.atom_context);
4847
4848 rdev->accel_working = true;
4849 r = cik_startup(rdev);
4850 if (r) {
4851 DRM_ERROR("cik startup failed on resume\n");
4852 rdev->accel_working = false;
4853 return r;
4854 }
4855
4856 return r;
4857
4858}
4859
4860/**
4861 * cik_suspend - suspend the asic
4862 *
4863 * @rdev: radeon_device pointer
4864 *
4865 * Bring the chip into a state suitable for suspend (CIK).
4866 * Called at suspend.
4867 * Returns 0 for success.
4868 */
4869int cik_suspend(struct radeon_device *rdev)
4870{
4871 radeon_vm_manager_fini(rdev);
4872 cik_cp_enable(rdev, false);
4873 cik_sdma_enable(rdev, false);
4874 cik_irq_suspend(rdev);
4875 radeon_wb_disable(rdev);
4876 cik_pcie_gart_disable(rdev);
4877 return 0;
4878}
4879
4880/* Plan is to move initialization in that function and use
4881 * helper function so that radeon_device_init pretty much
4882 * do nothing more than calling asic specific function. This
4883 * should also allow to remove a bunch of callback function
4884 * like vram_info.
4885 */
4886/**
4887 * cik_init - asic specific driver and hw init
4888 *
4889 * @rdev: radeon_device pointer
4890 *
4891 * Setup asic specific driver variables and program the hw
4892 * to a functional state (CIK).
4893 * Called at driver startup.
4894 * Returns 0 for success, errors for failure.
4895 */
4896int cik_init(struct radeon_device *rdev)
4897{
4898 struct radeon_ring *ring;
4899 int r;
4900
4901 /* Read BIOS */
4902 if (!radeon_get_bios(rdev)) {
4903 if (ASIC_IS_AVIVO(rdev))
4904 return -EINVAL;
4905 }
4906 /* Must be an ATOMBIOS */
4907 if (!rdev->is_atom_bios) {
4908 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
4909 return -EINVAL;
4910 }
4911 r = radeon_atombios_init(rdev);
4912 if (r)
4913 return r;
4914
4915 /* Post card if necessary */
4916 if (!radeon_card_posted(rdev)) {
4917 if (!rdev->bios) {
4918 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
4919 return -EINVAL;
4920 }
4921 DRM_INFO("GPU not posted. posting now...\n");
4922 atom_asic_init(rdev->mode_info.atom_context);
4923 }
4924 /* Initialize scratch registers */
4925 cik_scratch_init(rdev);
4926 /* Initialize surface registers */
4927 radeon_surface_init(rdev);
4928 /* Initialize clocks */
4929 radeon_get_clock_info(rdev->ddev);
4930
4931 /* Fence driver */
4932 r = radeon_fence_driver_init(rdev);
4933 if (r)
4934 return r;
4935
4936 /* initialize memory controller */
4937 r = cik_mc_init(rdev);
4938 if (r)
4939 return r;
4940 /* Memory manager */
4941 r = radeon_bo_init(rdev);
4942 if (r)
4943 return r;
4944
4945 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4946 ring->ring_obj = NULL;
4947 r600_ring_init(rdev, ring, 1024 * 1024);
4948
4949 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4950 ring->ring_obj = NULL;
4951 r600_ring_init(rdev, ring, 256 * 1024);
4952
4953 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4954 ring->ring_obj = NULL;
4955 r600_ring_init(rdev, ring, 256 * 1024);
4956
4957 rdev->ih.ring_obj = NULL;
4958 r600_ih_ring_init(rdev, 64 * 1024);
4959
4960 r = r600_pcie_gart_init(rdev);
4961 if (r)
4962 return r;
4963
4964 rdev->accel_working = true;
4965 r = cik_startup(rdev);
4966 if (r) {
4967 dev_err(rdev->dev, "disabling GPU acceleration\n");
4968 cik_cp_fini(rdev);
4969 cik_sdma_fini(rdev);
4970 cik_irq_fini(rdev);
4971 si_rlc_fini(rdev);
4972 radeon_wb_fini(rdev);
4973 radeon_ib_pool_fini(rdev);
4974 radeon_vm_manager_fini(rdev);
4975 radeon_irq_kms_fini(rdev);
4976 cik_pcie_gart_fini(rdev);
4977 rdev->accel_working = false;
4978 }
4979
4980 /* Don't start up if the MC ucode is missing.
4981 * The default clocks and voltages before the MC ucode
4982 * is loaded are not suffient for advanced operations.
4983 */
4984 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
4985 DRM_ERROR("radeon: MC ucode required for NI+.\n");
4986 return -EINVAL;
4987 }
4988
4989 return 0;
4990}
4991
4992/**
4993 * cik_fini - asic specific driver and hw fini
4994 *
4995 * @rdev: radeon_device pointer
4996 *
4997 * Tear down the asic specific driver variables and program the hw
4998 * to an idle state (CIK).
4999 * Called at driver unload.
5000 */
5001void cik_fini(struct radeon_device *rdev)
5002{
5003 cik_cp_fini(rdev);
5004 cik_sdma_fini(rdev);
5005 cik_irq_fini(rdev);
5006 si_rlc_fini(rdev);
5007 radeon_wb_fini(rdev);
5008 radeon_vm_manager_fini(rdev);
5009 radeon_ib_pool_fini(rdev);
5010 radeon_irq_kms_fini(rdev);
5011 cik_pcie_gart_fini(rdev);
5012 r600_vram_scratch_fini(rdev);
5013 radeon_gem_fini(rdev);
5014 radeon_fence_driver_fini(rdev);
5015 radeon_bo_fini(rdev);
5016 radeon_atombios_fini(rdev);
5017 kfree(rdev->bios);
5018 rdev->bios = NULL;
5019}
Alex Deuchercd84a272012-07-20 17:13:13 -04005020
5021/* display watermark setup */
5022/**
5023 * dce8_line_buffer_adjust - Set up the line buffer
5024 *
5025 * @rdev: radeon_device pointer
5026 * @radeon_crtc: the selected display controller
5027 * @mode: the current display mode on the selected display
5028 * controller
5029 *
5030 * Setup up the line buffer allocation for
5031 * the selected display controller (CIK).
5032 * Returns the line buffer size in pixels.
5033 */
5034static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
5035 struct radeon_crtc *radeon_crtc,
5036 struct drm_display_mode *mode)
5037{
5038 u32 tmp;
5039
5040 /*
5041 * Line Buffer Setup
5042 * There are 6 line buffers, one for each display controllers.
5043 * There are 3 partitions per LB. Select the number of partitions
5044 * to enable based on the display width. For display widths larger
5045 * than 4096, you need use to use 2 display controllers and combine
5046 * them using the stereo blender.
5047 */
5048 if (radeon_crtc->base.enabled && mode) {
5049 if (mode->crtc_hdisplay < 1920)
5050 tmp = 1;
5051 else if (mode->crtc_hdisplay < 2560)
5052 tmp = 2;
5053 else if (mode->crtc_hdisplay < 4096)
5054 tmp = 0;
5055 else {
5056 DRM_DEBUG_KMS("Mode too big for LB!\n");
5057 tmp = 0;
5058 }
5059 } else
5060 tmp = 1;
5061
5062 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
5063 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
5064
5065 if (radeon_crtc->base.enabled && mode) {
5066 switch (tmp) {
5067 case 0:
5068 default:
5069 return 4096 * 2;
5070 case 1:
5071 return 1920 * 2;
5072 case 2:
5073 return 2560 * 2;
5074 }
5075 }
5076
5077 /* controller not enabled, so no lb used */
5078 return 0;
5079}
5080
5081/**
5082 * cik_get_number_of_dram_channels - get the number of dram channels
5083 *
5084 * @rdev: radeon_device pointer
5085 *
5086 * Look up the number of video ram channels (CIK).
5087 * Used for display watermark bandwidth calculations
5088 * Returns the number of dram channels
5089 */
5090static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
5091{
5092 u32 tmp = RREG32(MC_SHARED_CHMAP);
5093
5094 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5095 case 0:
5096 default:
5097 return 1;
5098 case 1:
5099 return 2;
5100 case 2:
5101 return 4;
5102 case 3:
5103 return 8;
5104 case 4:
5105 return 3;
5106 case 5:
5107 return 6;
5108 case 6:
5109 return 10;
5110 case 7:
5111 return 12;
5112 case 8:
5113 return 16;
5114 }
5115}
5116
5117struct dce8_wm_params {
5118 u32 dram_channels; /* number of dram channels */
5119 u32 yclk; /* bandwidth per dram data pin in kHz */
5120 u32 sclk; /* engine clock in kHz */
5121 u32 disp_clk; /* display clock in kHz */
5122 u32 src_width; /* viewport width */
5123 u32 active_time; /* active display time in ns */
5124 u32 blank_time; /* blank time in ns */
5125 bool interlaced; /* mode is interlaced */
5126 fixed20_12 vsc; /* vertical scale ratio */
5127 u32 num_heads; /* number of active crtcs */
5128 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
5129 u32 lb_size; /* line buffer allocated to pipe */
5130 u32 vtaps; /* vertical scaler taps */
5131};
5132
5133/**
5134 * dce8_dram_bandwidth - get the dram bandwidth
5135 *
5136 * @wm: watermark calculation data
5137 *
5138 * Calculate the raw dram bandwidth (CIK).
5139 * Used for display watermark bandwidth calculations
5140 * Returns the dram bandwidth in MBytes/s
5141 */
5142static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
5143{
5144 /* Calculate raw DRAM Bandwidth */
5145 fixed20_12 dram_efficiency; /* 0.7 */
5146 fixed20_12 yclk, dram_channels, bandwidth;
5147 fixed20_12 a;
5148
5149 a.full = dfixed_const(1000);
5150 yclk.full = dfixed_const(wm->yclk);
5151 yclk.full = dfixed_div(yclk, a);
5152 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5153 a.full = dfixed_const(10);
5154 dram_efficiency.full = dfixed_const(7);
5155 dram_efficiency.full = dfixed_div(dram_efficiency, a);
5156 bandwidth.full = dfixed_mul(dram_channels, yclk);
5157 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
5158
5159 return dfixed_trunc(bandwidth);
5160}
5161
5162/**
5163 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5164 *
5165 * @wm: watermark calculation data
5166 *
5167 * Calculate the dram bandwidth used for display (CIK).
5168 * Used for display watermark bandwidth calculations
5169 * Returns the dram bandwidth for display in MBytes/s
5170 */
5171static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5172{
5173 /* Calculate DRAM Bandwidth and the part allocated to display. */
5174 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
5175 fixed20_12 yclk, dram_channels, bandwidth;
5176 fixed20_12 a;
5177
5178 a.full = dfixed_const(1000);
5179 yclk.full = dfixed_const(wm->yclk);
5180 yclk.full = dfixed_div(yclk, a);
5181 dram_channels.full = dfixed_const(wm->dram_channels * 4);
5182 a.full = dfixed_const(10);
5183 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
5184 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
5185 bandwidth.full = dfixed_mul(dram_channels, yclk);
5186 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
5187
5188 return dfixed_trunc(bandwidth);
5189}
5190
5191/**
5192 * dce8_data_return_bandwidth - get the data return bandwidth
5193 *
5194 * @wm: watermark calculation data
5195 *
5196 * Calculate the data return bandwidth used for display (CIK).
5197 * Used for display watermark bandwidth calculations
5198 * Returns the data return bandwidth in MBytes/s
5199 */
5200static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
5201{
5202 /* Calculate the display Data return Bandwidth */
5203 fixed20_12 return_efficiency; /* 0.8 */
5204 fixed20_12 sclk, bandwidth;
5205 fixed20_12 a;
5206
5207 a.full = dfixed_const(1000);
5208 sclk.full = dfixed_const(wm->sclk);
5209 sclk.full = dfixed_div(sclk, a);
5210 a.full = dfixed_const(10);
5211 return_efficiency.full = dfixed_const(8);
5212 return_efficiency.full = dfixed_div(return_efficiency, a);
5213 a.full = dfixed_const(32);
5214 bandwidth.full = dfixed_mul(a, sclk);
5215 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
5216
5217 return dfixed_trunc(bandwidth);
5218}
5219
5220/**
5221 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5222 *
5223 * @wm: watermark calculation data
5224 *
5225 * Calculate the dmif bandwidth used for display (CIK).
5226 * Used for display watermark bandwidth calculations
5227 * Returns the dmif bandwidth in MBytes/s
5228 */
5229static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
5230{
5231 /* Calculate the DMIF Request Bandwidth */
5232 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
5233 fixed20_12 disp_clk, bandwidth;
5234 fixed20_12 a, b;
5235
5236 a.full = dfixed_const(1000);
5237 disp_clk.full = dfixed_const(wm->disp_clk);
5238 disp_clk.full = dfixed_div(disp_clk, a);
5239 a.full = dfixed_const(32);
5240 b.full = dfixed_mul(a, disp_clk);
5241
5242 a.full = dfixed_const(10);
5243 disp_clk_request_efficiency.full = dfixed_const(8);
5244 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
5245
5246 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
5247
5248 return dfixed_trunc(bandwidth);
5249}
5250
5251/**
5252 * dce8_available_bandwidth - get the min available bandwidth
5253 *
5254 * @wm: watermark calculation data
5255 *
5256 * Calculate the min available bandwidth used for display (CIK).
5257 * Used for display watermark bandwidth calculations
5258 * Returns the min available bandwidth in MBytes/s
5259 */
5260static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
5261{
5262 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
5263 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
5264 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
5265 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
5266
5267 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
5268}
5269
5270/**
5271 * dce8_average_bandwidth - get the average available bandwidth
5272 *
5273 * @wm: watermark calculation data
5274 *
5275 * Calculate the average available bandwidth used for display (CIK).
5276 * Used for display watermark bandwidth calculations
5277 * Returns the average available bandwidth in MBytes/s
5278 */
5279static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
5280{
5281 /* Calculate the display mode Average Bandwidth
5282 * DisplayMode should contain the source and destination dimensions,
5283 * timing, etc.
5284 */
5285 fixed20_12 bpp;
5286 fixed20_12 line_time;
5287 fixed20_12 src_width;
5288 fixed20_12 bandwidth;
5289 fixed20_12 a;
5290
5291 a.full = dfixed_const(1000);
5292 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
5293 line_time.full = dfixed_div(line_time, a);
5294 bpp.full = dfixed_const(wm->bytes_per_pixel);
5295 src_width.full = dfixed_const(wm->src_width);
5296 bandwidth.full = dfixed_mul(src_width, bpp);
5297 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
5298 bandwidth.full = dfixed_div(bandwidth, line_time);
5299
5300 return dfixed_trunc(bandwidth);
5301}
5302
5303/**
5304 * dce8_latency_watermark - get the latency watermark
5305 *
5306 * @wm: watermark calculation data
5307 *
5308 * Calculate the latency watermark (CIK).
5309 * Used for display watermark bandwidth calculations
5310 * Returns the latency watermark in ns
5311 */
5312static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
5313{
5314 /* First calculate the latency in ns */
5315 u32 mc_latency = 2000; /* 2000 ns. */
5316 u32 available_bandwidth = dce8_available_bandwidth(wm);
5317 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
5318 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
5319 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
5320 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
5321 (wm->num_heads * cursor_line_pair_return_time);
5322 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
5323 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
5324 u32 tmp, dmif_size = 12288;
5325 fixed20_12 a, b, c;
5326
5327 if (wm->num_heads == 0)
5328 return 0;
5329
5330 a.full = dfixed_const(2);
5331 b.full = dfixed_const(1);
5332 if ((wm->vsc.full > a.full) ||
5333 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
5334 (wm->vtaps >= 5) ||
5335 ((wm->vsc.full >= a.full) && wm->interlaced))
5336 max_src_lines_per_dst_line = 4;
5337 else
5338 max_src_lines_per_dst_line = 2;
5339
5340 a.full = dfixed_const(available_bandwidth);
5341 b.full = dfixed_const(wm->num_heads);
5342 a.full = dfixed_div(a, b);
5343
5344 b.full = dfixed_const(mc_latency + 512);
5345 c.full = dfixed_const(wm->disp_clk);
5346 b.full = dfixed_div(b, c);
5347
5348 c.full = dfixed_const(dmif_size);
5349 b.full = dfixed_div(c, b);
5350
5351 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
5352
5353 b.full = dfixed_const(1000);
5354 c.full = dfixed_const(wm->disp_clk);
5355 b.full = dfixed_div(c, b);
5356 c.full = dfixed_const(wm->bytes_per_pixel);
5357 b.full = dfixed_mul(b, c);
5358
5359 lb_fill_bw = min(tmp, dfixed_trunc(b));
5360
5361 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
5362 b.full = dfixed_const(1000);
5363 c.full = dfixed_const(lb_fill_bw);
5364 b.full = dfixed_div(c, b);
5365 a.full = dfixed_div(a, b);
5366 line_fill_time = dfixed_trunc(a);
5367
5368 if (line_fill_time < wm->active_time)
5369 return latency;
5370 else
5371 return latency + (line_fill_time - wm->active_time);
5372
5373}
5374
5375/**
5376 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
5377 * average and available dram bandwidth
5378 *
5379 * @wm: watermark calculation data
5380 *
5381 * Check if the display average bandwidth fits in the display
5382 * dram bandwidth (CIK).
5383 * Used for display watermark bandwidth calculations
5384 * Returns true if the display fits, false if not.
5385 */
5386static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
5387{
5388 if (dce8_average_bandwidth(wm) <=
5389 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
5390 return true;
5391 else
5392 return false;
5393}
5394
5395/**
5396 * dce8_average_bandwidth_vs_available_bandwidth - check
5397 * average and available bandwidth
5398 *
5399 * @wm: watermark calculation data
5400 *
5401 * Check if the display average bandwidth fits in the display
5402 * available bandwidth (CIK).
5403 * Used for display watermark bandwidth calculations
5404 * Returns true if the display fits, false if not.
5405 */
5406static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
5407{
5408 if (dce8_average_bandwidth(wm) <=
5409 (dce8_available_bandwidth(wm) / wm->num_heads))
5410 return true;
5411 else
5412 return false;
5413}
5414
5415/**
5416 * dce8_check_latency_hiding - check latency hiding
5417 *
5418 * @wm: watermark calculation data
5419 *
5420 * Check latency hiding (CIK).
5421 * Used for display watermark bandwidth calculations
5422 * Returns true if the display fits, false if not.
5423 */
5424static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
5425{
5426 u32 lb_partitions = wm->lb_size / wm->src_width;
5427 u32 line_time = wm->active_time + wm->blank_time;
5428 u32 latency_tolerant_lines;
5429 u32 latency_hiding;
5430 fixed20_12 a;
5431
5432 a.full = dfixed_const(1);
5433 if (wm->vsc.full > a.full)
5434 latency_tolerant_lines = 1;
5435 else {
5436 if (lb_partitions <= (wm->vtaps + 1))
5437 latency_tolerant_lines = 1;
5438 else
5439 latency_tolerant_lines = 2;
5440 }
5441
5442 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
5443
5444 if (dce8_latency_watermark(wm) <= latency_hiding)
5445 return true;
5446 else
5447 return false;
5448}
5449
5450/**
5451 * dce8_program_watermarks - program display watermarks
5452 *
5453 * @rdev: radeon_device pointer
5454 * @radeon_crtc: the selected display controller
5455 * @lb_size: line buffer size
5456 * @num_heads: number of display controllers in use
5457 *
5458 * Calculate and program the display watermarks for the
5459 * selected display controller (CIK).
5460 */
5461static void dce8_program_watermarks(struct radeon_device *rdev,
5462 struct radeon_crtc *radeon_crtc,
5463 u32 lb_size, u32 num_heads)
5464{
5465 struct drm_display_mode *mode = &radeon_crtc->base.mode;
5466 struct dce8_wm_params wm;
5467 u32 pixel_period;
5468 u32 line_time = 0;
5469 u32 latency_watermark_a = 0, latency_watermark_b = 0;
5470 u32 tmp, wm_mask;
5471
5472 if (radeon_crtc->base.enabled && num_heads && mode) {
5473 pixel_period = 1000000 / (u32)mode->clock;
5474 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
5475
5476 wm.yclk = rdev->pm.current_mclk * 10;
5477 wm.sclk = rdev->pm.current_sclk * 10;
5478 wm.disp_clk = mode->clock;
5479 wm.src_width = mode->crtc_hdisplay;
5480 wm.active_time = mode->crtc_hdisplay * pixel_period;
5481 wm.blank_time = line_time - wm.active_time;
5482 wm.interlaced = false;
5483 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
5484 wm.interlaced = true;
5485 wm.vsc = radeon_crtc->vsc;
5486 wm.vtaps = 1;
5487 if (radeon_crtc->rmx_type != RMX_OFF)
5488 wm.vtaps = 2;
5489 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
5490 wm.lb_size = lb_size;
5491 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
5492 wm.num_heads = num_heads;
5493
5494 /* set for high clocks */
5495 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
5496 /* set for low clocks */
5497 /* wm.yclk = low clk; wm.sclk = low clk */
5498 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
5499
5500 /* possibly force display priority to high */
5501 /* should really do this at mode validation time... */
5502 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
5503 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
5504 !dce8_check_latency_hiding(&wm) ||
5505 (rdev->disp_priority == 2)) {
5506 DRM_DEBUG_KMS("force priority to high\n");
5507 }
5508 }
5509
5510 /* select wm A */
5511 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5512 tmp = wm_mask;
5513 tmp &= ~LATENCY_WATERMARK_MASK(3);
5514 tmp |= LATENCY_WATERMARK_MASK(1);
5515 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5516 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5517 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
5518 LATENCY_HIGH_WATERMARK(line_time)));
5519 /* select wm B */
5520 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
5521 tmp &= ~LATENCY_WATERMARK_MASK(3);
5522 tmp |= LATENCY_WATERMARK_MASK(2);
5523 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
5524 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
5525 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
5526 LATENCY_HIGH_WATERMARK(line_time)));
5527 /* restore original selection */
5528 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
5529}
5530
5531/**
5532 * dce8_bandwidth_update - program display watermarks
5533 *
5534 * @rdev: radeon_device pointer
5535 *
5536 * Calculate and program the display watermarks and line
5537 * buffer allocation (CIK).
5538 */
5539void dce8_bandwidth_update(struct radeon_device *rdev)
5540{
5541 struct drm_display_mode *mode = NULL;
5542 u32 num_heads = 0, lb_size;
5543 int i;
5544
5545 radeon_update_display_priority(rdev);
5546
5547 for (i = 0; i < rdev->num_crtc; i++) {
5548 if (rdev->mode_info.crtcs[i]->base.enabled)
5549 num_heads++;
5550 }
5551 for (i = 0; i < rdev->num_crtc; i++) {
5552 mode = &rdev->mode_info.crtcs[i]->base.mode;
5553 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
5554 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
5555 }
5556}
Alex Deucher44fa3462012-12-18 22:17:00 -05005557
5558/**
5559 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
5560 *
5561 * @rdev: radeon_device pointer
5562 *
5563 * Fetches a GPU clock counter snapshot (SI).
5564 * Returns the 64 bit clock counter snapshot.
5565 */
5566uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
5567{
5568 uint64_t clock;
5569
5570 mutex_lock(&rdev->gpu_clock_mutex);
5571 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5572 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5573 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5574 mutex_unlock(&rdev->gpu_clock_mutex);
5575 return clock;
5576}
5577