blob: 2b6049d55233061086373d7efb4aee76dd4d16a2 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher22c775c2013-07-23 09:41:05 -040034#include "clearstate_ci.h"
Alex Deucher02c81322012-12-18 21:43:07 -050035
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040042MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -040043MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050044MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040049MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050050MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51MODULE_FIRMWARE("radeon/KABINI_me.bin");
52MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040055MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050056
Alex Deuchera59781b2012-11-09 10:45:57 -050057extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040059extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040061extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -040062extern void sumo_rlc_fini(struct radeon_device *rdev);
63extern int sumo_rlc_init(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040064extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher866d83d2013-04-15 17:13:29 -040065extern void si_rlc_reset(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040066extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
Christian König2483b4e2013-08-13 11:56:54 +020067extern int cik_sdma_resume(struct radeon_device *rdev);
68extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69extern void cik_sdma_fini(struct radeon_device *rdev);
70extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 struct radeon_ib *ib,
72 uint64_t pe,
73 uint64_t addr, unsigned count,
74 uint32_t incr, uint32_t flags);
Alex Deuchercc066712013-04-09 12:59:51 -040075static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040076static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040077static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040078static void cik_init_pg(struct radeon_device *rdev);
79static void cik_init_cg(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher286d9cc2013-06-21 15:50:47 -040081/* get temperature in millidegrees */
82int ci_get_temp(struct radeon_device *rdev)
83{
84 u32 temp;
85 int actual_temp = 0;
86
87 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
88 CTF_TEMP_SHIFT;
89
90 if (temp & 0x200)
91 actual_temp = 255;
92 else
93 actual_temp = temp & 0x1ff;
94
95 actual_temp = actual_temp * 1000;
96
97 return actual_temp;
98}
99
100/* get temperature in millidegrees */
101int kv_get_temp(struct radeon_device *rdev)
102{
103 u32 temp;
104 int actual_temp = 0;
105
106 temp = RREG32_SMC(0xC0300E0C);
107
108 if (temp)
109 actual_temp = (temp / 8) - 49;
110 else
111 actual_temp = 0;
112
113 actual_temp = actual_temp * 1000;
114
115 return actual_temp;
116}
117
Alex Deucher6e2c3c02013-04-03 19:28:32 -0400118/*
119 * Indirect registers accessor
120 */
121u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
122{
123 u32 r;
124
125 WREG32(PCIE_INDEX, reg);
126 (void)RREG32(PCIE_INDEX);
127 r = RREG32(PCIE_DATA);
128 return r;
129}
130
131void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
132{
133 WREG32(PCIE_INDEX, reg);
134 (void)RREG32(PCIE_INDEX);
135 WREG32(PCIE_DATA, v);
136 (void)RREG32(PCIE_DATA);
137}
138
Alex Deucher22c775c2013-07-23 09:41:05 -0400139static const u32 spectre_rlc_save_restore_register_list[] =
140{
141 (0x0e00 << 16) | (0xc12c >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0xc140 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0xc150 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0xc15c >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc168 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc170 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0xc178 >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0xc204 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0xc2b4 >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0xc2b8 >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0xc2bc >> 2),
162 0x00000000,
163 (0x0e00 << 16) | (0xc2c0 >> 2),
164 0x00000000,
165 (0x0e00 << 16) | (0x8228 >> 2),
166 0x00000000,
167 (0x0e00 << 16) | (0x829c >> 2),
168 0x00000000,
169 (0x0e00 << 16) | (0x869c >> 2),
170 0x00000000,
171 (0x0600 << 16) | (0x98f4 >> 2),
172 0x00000000,
173 (0x0e00 << 16) | (0x98f8 >> 2),
174 0x00000000,
175 (0x0e00 << 16) | (0x9900 >> 2),
176 0x00000000,
177 (0x0e00 << 16) | (0xc260 >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x90e8 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0x3c000 >> 2),
182 0x00000000,
183 (0x0e00 << 16) | (0x3c00c >> 2),
184 0x00000000,
185 (0x0e00 << 16) | (0x8c1c >> 2),
186 0x00000000,
187 (0x0e00 << 16) | (0x9700 >> 2),
188 0x00000000,
189 (0x0e00 << 16) | (0xcd20 >> 2),
190 0x00000000,
191 (0x4e00 << 16) | (0xcd20 >> 2),
192 0x00000000,
193 (0x5e00 << 16) | (0xcd20 >> 2),
194 0x00000000,
195 (0x6e00 << 16) | (0xcd20 >> 2),
196 0x00000000,
197 (0x7e00 << 16) | (0xcd20 >> 2),
198 0x00000000,
199 (0x8e00 << 16) | (0xcd20 >> 2),
200 0x00000000,
201 (0x9e00 << 16) | (0xcd20 >> 2),
202 0x00000000,
203 (0xae00 << 16) | (0xcd20 >> 2),
204 0x00000000,
205 (0xbe00 << 16) | (0xcd20 >> 2),
206 0x00000000,
207 (0x0e00 << 16) | (0x89bc >> 2),
208 0x00000000,
209 (0x0e00 << 16) | (0x8900 >> 2),
210 0x00000000,
211 0x3,
212 (0x0e00 << 16) | (0xc130 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc134 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc1fc >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc208 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc264 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc268 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc26c >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc270 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc274 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc278 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc27c >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0xc280 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0xc284 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0xc288 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0xc28c >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0xc290 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0xc294 >> 2),
245 0x00000000,
246 (0x0e00 << 16) | (0xc298 >> 2),
247 0x00000000,
248 (0x0e00 << 16) | (0xc29c >> 2),
249 0x00000000,
250 (0x0e00 << 16) | (0xc2a0 >> 2),
251 0x00000000,
252 (0x0e00 << 16) | (0xc2a4 >> 2),
253 0x00000000,
254 (0x0e00 << 16) | (0xc2a8 >> 2),
255 0x00000000,
256 (0x0e00 << 16) | (0xc2ac >> 2),
257 0x00000000,
258 (0x0e00 << 16) | (0xc2b0 >> 2),
259 0x00000000,
260 (0x0e00 << 16) | (0x301d0 >> 2),
261 0x00000000,
262 (0x0e00 << 16) | (0x30238 >> 2),
263 0x00000000,
264 (0x0e00 << 16) | (0x30250 >> 2),
265 0x00000000,
266 (0x0e00 << 16) | (0x30254 >> 2),
267 0x00000000,
268 (0x0e00 << 16) | (0x30258 >> 2),
269 0x00000000,
270 (0x0e00 << 16) | (0x3025c >> 2),
271 0x00000000,
272 (0x4e00 << 16) | (0xc900 >> 2),
273 0x00000000,
274 (0x5e00 << 16) | (0xc900 >> 2),
275 0x00000000,
276 (0x6e00 << 16) | (0xc900 >> 2),
277 0x00000000,
278 (0x7e00 << 16) | (0xc900 >> 2),
279 0x00000000,
280 (0x8e00 << 16) | (0xc900 >> 2),
281 0x00000000,
282 (0x9e00 << 16) | (0xc900 >> 2),
283 0x00000000,
284 (0xae00 << 16) | (0xc900 >> 2),
285 0x00000000,
286 (0xbe00 << 16) | (0xc900 >> 2),
287 0x00000000,
288 (0x4e00 << 16) | (0xc904 >> 2),
289 0x00000000,
290 (0x5e00 << 16) | (0xc904 >> 2),
291 0x00000000,
292 (0x6e00 << 16) | (0xc904 >> 2),
293 0x00000000,
294 (0x7e00 << 16) | (0xc904 >> 2),
295 0x00000000,
296 (0x8e00 << 16) | (0xc904 >> 2),
297 0x00000000,
298 (0x9e00 << 16) | (0xc904 >> 2),
299 0x00000000,
300 (0xae00 << 16) | (0xc904 >> 2),
301 0x00000000,
302 (0xbe00 << 16) | (0xc904 >> 2),
303 0x00000000,
304 (0x4e00 << 16) | (0xc908 >> 2),
305 0x00000000,
306 (0x5e00 << 16) | (0xc908 >> 2),
307 0x00000000,
308 (0x6e00 << 16) | (0xc908 >> 2),
309 0x00000000,
310 (0x7e00 << 16) | (0xc908 >> 2),
311 0x00000000,
312 (0x8e00 << 16) | (0xc908 >> 2),
313 0x00000000,
314 (0x9e00 << 16) | (0xc908 >> 2),
315 0x00000000,
316 (0xae00 << 16) | (0xc908 >> 2),
317 0x00000000,
318 (0xbe00 << 16) | (0xc908 >> 2),
319 0x00000000,
320 (0x4e00 << 16) | (0xc90c >> 2),
321 0x00000000,
322 (0x5e00 << 16) | (0xc90c >> 2),
323 0x00000000,
324 (0x6e00 << 16) | (0xc90c >> 2),
325 0x00000000,
326 (0x7e00 << 16) | (0xc90c >> 2),
327 0x00000000,
328 (0x8e00 << 16) | (0xc90c >> 2),
329 0x00000000,
330 (0x9e00 << 16) | (0xc90c >> 2),
331 0x00000000,
332 (0xae00 << 16) | (0xc90c >> 2),
333 0x00000000,
334 (0xbe00 << 16) | (0xc90c >> 2),
335 0x00000000,
336 (0x4e00 << 16) | (0xc910 >> 2),
337 0x00000000,
338 (0x5e00 << 16) | (0xc910 >> 2),
339 0x00000000,
340 (0x6e00 << 16) | (0xc910 >> 2),
341 0x00000000,
342 (0x7e00 << 16) | (0xc910 >> 2),
343 0x00000000,
344 (0x8e00 << 16) | (0xc910 >> 2),
345 0x00000000,
346 (0x9e00 << 16) | (0xc910 >> 2),
347 0x00000000,
348 (0xae00 << 16) | (0xc910 >> 2),
349 0x00000000,
350 (0xbe00 << 16) | (0xc910 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0xc99c >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0x9834 >> 2),
355 0x00000000,
356 (0x0000 << 16) | (0x30f00 >> 2),
357 0x00000000,
358 (0x0001 << 16) | (0x30f00 >> 2),
359 0x00000000,
360 (0x0000 << 16) | (0x30f04 >> 2),
361 0x00000000,
362 (0x0001 << 16) | (0x30f04 >> 2),
363 0x00000000,
364 (0x0000 << 16) | (0x30f08 >> 2),
365 0x00000000,
366 (0x0001 << 16) | (0x30f08 >> 2),
367 0x00000000,
368 (0x0000 << 16) | (0x30f0c >> 2),
369 0x00000000,
370 (0x0001 << 16) | (0x30f0c >> 2),
371 0x00000000,
372 (0x0600 << 16) | (0x9b7c >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0x8a14 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0x8a18 >> 2),
377 0x00000000,
378 (0x0600 << 16) | (0x30a00 >> 2),
379 0x00000000,
380 (0x0e00 << 16) | (0x8bf0 >> 2),
381 0x00000000,
382 (0x0e00 << 16) | (0x8bcc >> 2),
383 0x00000000,
384 (0x0e00 << 16) | (0x8b24 >> 2),
385 0x00000000,
386 (0x0e00 << 16) | (0x30a04 >> 2),
387 0x00000000,
388 (0x0600 << 16) | (0x30a10 >> 2),
389 0x00000000,
390 (0x0600 << 16) | (0x30a14 >> 2),
391 0x00000000,
392 (0x0600 << 16) | (0x30a18 >> 2),
393 0x00000000,
394 (0x0600 << 16) | (0x30a2c >> 2),
395 0x00000000,
396 (0x0e00 << 16) | (0xc700 >> 2),
397 0x00000000,
398 (0x0e00 << 16) | (0xc704 >> 2),
399 0x00000000,
400 (0x0e00 << 16) | (0xc708 >> 2),
401 0x00000000,
402 (0x0e00 << 16) | (0xc768 >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc770 >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc774 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc778 >> 2),
409 0x00000000,
410 (0x0400 << 16) | (0xc77c >> 2),
411 0x00000000,
412 (0x0400 << 16) | (0xc780 >> 2),
413 0x00000000,
414 (0x0400 << 16) | (0xc784 >> 2),
415 0x00000000,
416 (0x0400 << 16) | (0xc788 >> 2),
417 0x00000000,
418 (0x0400 << 16) | (0xc78c >> 2),
419 0x00000000,
420 (0x0400 << 16) | (0xc798 >> 2),
421 0x00000000,
422 (0x0400 << 16) | (0xc79c >> 2),
423 0x00000000,
424 (0x0400 << 16) | (0xc7a0 >> 2),
425 0x00000000,
426 (0x0400 << 16) | (0xc7a4 >> 2),
427 0x00000000,
428 (0x0400 << 16) | (0xc7a8 >> 2),
429 0x00000000,
430 (0x0400 << 16) | (0xc7ac >> 2),
431 0x00000000,
432 (0x0400 << 16) | (0xc7b0 >> 2),
433 0x00000000,
434 (0x0400 << 16) | (0xc7b4 >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x9100 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x3c010 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x92a8 >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x92ac >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x92b4 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x92b8 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0x92bc >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0x92c0 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0x92c4 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0x92c8 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0x92cc >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x92d0 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x8c00 >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0x8c04 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x8c20 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0x8c38 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0x8c3c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xae00 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0x9604 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac08 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac0c >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0xac10 >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0xac14 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0xac58 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xac68 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xac6c >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0xac70 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0xac74 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0xac78 >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0xac7c >> 2),
495 0x00000000,
496 (0x0e00 << 16) | (0xac80 >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0xac84 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0xac88 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0xac8c >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x970c >> 2),
505 0x00000000,
506 (0x0e00 << 16) | (0x9714 >> 2),
507 0x00000000,
508 (0x0e00 << 16) | (0x9718 >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x971c >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x31068 >> 2),
513 0x00000000,
514 (0x4e00 << 16) | (0x31068 >> 2),
515 0x00000000,
516 (0x5e00 << 16) | (0x31068 >> 2),
517 0x00000000,
518 (0x6e00 << 16) | (0x31068 >> 2),
519 0x00000000,
520 (0x7e00 << 16) | (0x31068 >> 2),
521 0x00000000,
522 (0x8e00 << 16) | (0x31068 >> 2),
523 0x00000000,
524 (0x9e00 << 16) | (0x31068 >> 2),
525 0x00000000,
526 (0xae00 << 16) | (0x31068 >> 2),
527 0x00000000,
528 (0xbe00 << 16) | (0x31068 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0xcd10 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0xcd14 >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0x88b0 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x88b4 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x88b8 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x88bc >> 2),
541 0x00000000,
542 (0x0400 << 16) | (0x89c0 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x88c4 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x88c8 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x88d0 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x88d4 >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0x88d8 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0x8980 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0x30938 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0x3093c >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0x30940 >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0x89a0 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x30900 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x30904 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x89b4 >> 2),
569 0x00000000,
570 (0x0e00 << 16) | (0x3c210 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0x3c214 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0x3c218 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0x8904 >> 2),
577 0x00000000,
578 0x5,
579 (0x0e00 << 16) | (0x8c28 >> 2),
580 (0x0e00 << 16) | (0x8c2c >> 2),
581 (0x0e00 << 16) | (0x8c30 >> 2),
582 (0x0e00 << 16) | (0x8c34 >> 2),
583 (0x0e00 << 16) | (0x9600 >> 2),
584};
585
586static const u32 kalindi_rlc_save_restore_register_list[] =
587{
588 (0x0e00 << 16) | (0xc12c >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0xc140 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0xc150 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0xc15c >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc168 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0xc170 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0xc204 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0xc2b4 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0xc2b8 >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0xc2bc >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0xc2c0 >> 2),
609 0x00000000,
610 (0x0e00 << 16) | (0x8228 >> 2),
611 0x00000000,
612 (0x0e00 << 16) | (0x829c >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0x869c >> 2),
615 0x00000000,
616 (0x0600 << 16) | (0x98f4 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x98f8 >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x9900 >> 2),
621 0x00000000,
622 (0x0e00 << 16) | (0xc260 >> 2),
623 0x00000000,
624 (0x0e00 << 16) | (0x90e8 >> 2),
625 0x00000000,
626 (0x0e00 << 16) | (0x3c000 >> 2),
627 0x00000000,
628 (0x0e00 << 16) | (0x3c00c >> 2),
629 0x00000000,
630 (0x0e00 << 16) | (0x8c1c >> 2),
631 0x00000000,
632 (0x0e00 << 16) | (0x9700 >> 2),
633 0x00000000,
634 (0x0e00 << 16) | (0xcd20 >> 2),
635 0x00000000,
636 (0x4e00 << 16) | (0xcd20 >> 2),
637 0x00000000,
638 (0x5e00 << 16) | (0xcd20 >> 2),
639 0x00000000,
640 (0x6e00 << 16) | (0xcd20 >> 2),
641 0x00000000,
642 (0x7e00 << 16) | (0xcd20 >> 2),
643 0x00000000,
644 (0x0e00 << 16) | (0x89bc >> 2),
645 0x00000000,
646 (0x0e00 << 16) | (0x8900 >> 2),
647 0x00000000,
648 0x3,
649 (0x0e00 << 16) | (0xc130 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc134 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc1fc >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc208 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xc264 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0xc268 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0xc26c >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0xc270 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0xc274 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0xc28c >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0xc290 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0xc294 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0xc298 >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0xc2a0 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0xc2a4 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0xc2a8 >> 2),
680 0x00000000,
681 (0x0e00 << 16) | (0xc2ac >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0x301d0 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0x30238 >> 2),
686 0x00000000,
687 (0x0e00 << 16) | (0x30250 >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0x30254 >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x30258 >> 2),
692 0x00000000,
693 (0x0e00 << 16) | (0x3025c >> 2),
694 0x00000000,
695 (0x4e00 << 16) | (0xc900 >> 2),
696 0x00000000,
697 (0x5e00 << 16) | (0xc900 >> 2),
698 0x00000000,
699 (0x6e00 << 16) | (0xc900 >> 2),
700 0x00000000,
701 (0x7e00 << 16) | (0xc900 >> 2),
702 0x00000000,
703 (0x4e00 << 16) | (0xc904 >> 2),
704 0x00000000,
705 (0x5e00 << 16) | (0xc904 >> 2),
706 0x00000000,
707 (0x6e00 << 16) | (0xc904 >> 2),
708 0x00000000,
709 (0x7e00 << 16) | (0xc904 >> 2),
710 0x00000000,
711 (0x4e00 << 16) | (0xc908 >> 2),
712 0x00000000,
713 (0x5e00 << 16) | (0xc908 >> 2),
714 0x00000000,
715 (0x6e00 << 16) | (0xc908 >> 2),
716 0x00000000,
717 (0x7e00 << 16) | (0xc908 >> 2),
718 0x00000000,
719 (0x4e00 << 16) | (0xc90c >> 2),
720 0x00000000,
721 (0x5e00 << 16) | (0xc90c >> 2),
722 0x00000000,
723 (0x6e00 << 16) | (0xc90c >> 2),
724 0x00000000,
725 (0x7e00 << 16) | (0xc90c >> 2),
726 0x00000000,
727 (0x4e00 << 16) | (0xc910 >> 2),
728 0x00000000,
729 (0x5e00 << 16) | (0xc910 >> 2),
730 0x00000000,
731 (0x6e00 << 16) | (0xc910 >> 2),
732 0x00000000,
733 (0x7e00 << 16) | (0xc910 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0xc99c >> 2),
736 0x00000000,
737 (0x0e00 << 16) | (0x9834 >> 2),
738 0x00000000,
739 (0x0000 << 16) | (0x30f00 >> 2),
740 0x00000000,
741 (0x0000 << 16) | (0x30f04 >> 2),
742 0x00000000,
743 (0x0000 << 16) | (0x30f08 >> 2),
744 0x00000000,
745 (0x0000 << 16) | (0x30f0c >> 2),
746 0x00000000,
747 (0x0600 << 16) | (0x9b7c >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8a14 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x8a18 >> 2),
752 0x00000000,
753 (0x0600 << 16) | (0x30a00 >> 2),
754 0x00000000,
755 (0x0e00 << 16) | (0x8bf0 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x8bcc >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0x8b24 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0x30a04 >> 2),
762 0x00000000,
763 (0x0600 << 16) | (0x30a10 >> 2),
764 0x00000000,
765 (0x0600 << 16) | (0x30a14 >> 2),
766 0x00000000,
767 (0x0600 << 16) | (0x30a18 >> 2),
768 0x00000000,
769 (0x0600 << 16) | (0x30a2c >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0xc700 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xc704 >> 2),
774 0x00000000,
775 (0x0e00 << 16) | (0xc708 >> 2),
776 0x00000000,
777 (0x0e00 << 16) | (0xc768 >> 2),
778 0x00000000,
779 (0x0400 << 16) | (0xc770 >> 2),
780 0x00000000,
781 (0x0400 << 16) | (0xc774 >> 2),
782 0x00000000,
783 (0x0400 << 16) | (0xc798 >> 2),
784 0x00000000,
785 (0x0400 << 16) | (0xc79c >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0x9100 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0x3c010 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0x8c00 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0x8c04 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x8c20 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0x8c38 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0x8c3c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xae00 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0x9604 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac08 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac0c >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xac10 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0xac14 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0xac58 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0xac68 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0xac6c >> 2),
818 0x00000000,
819 (0x0e00 << 16) | (0xac70 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0xac74 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0xac78 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xac7c >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0xac80 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0xac84 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0xac88 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0xac8c >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x970c >> 2),
836 0x00000000,
837 (0x0e00 << 16) | (0x9714 >> 2),
838 0x00000000,
839 (0x0e00 << 16) | (0x9718 >> 2),
840 0x00000000,
841 (0x0e00 << 16) | (0x971c >> 2),
842 0x00000000,
843 (0x0e00 << 16) | (0x31068 >> 2),
844 0x00000000,
845 (0x4e00 << 16) | (0x31068 >> 2),
846 0x00000000,
847 (0x5e00 << 16) | (0x31068 >> 2),
848 0x00000000,
849 (0x6e00 << 16) | (0x31068 >> 2),
850 0x00000000,
851 (0x7e00 << 16) | (0x31068 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0xcd10 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0xcd14 >> 2),
856 0x00000000,
857 (0x0e00 << 16) | (0x88b0 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x88b4 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x88b8 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x88bc >> 2),
864 0x00000000,
865 (0x0400 << 16) | (0x89c0 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x88c4 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x88c8 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x88d0 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x88d4 >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x88d8 >> 2),
876 0x00000000,
877 (0x0e00 << 16) | (0x8980 >> 2),
878 0x00000000,
879 (0x0e00 << 16) | (0x30938 >> 2),
880 0x00000000,
881 (0x0e00 << 16) | (0x3093c >> 2),
882 0x00000000,
883 (0x0e00 << 16) | (0x30940 >> 2),
884 0x00000000,
885 (0x0e00 << 16) | (0x89a0 >> 2),
886 0x00000000,
887 (0x0e00 << 16) | (0x30900 >> 2),
888 0x00000000,
889 (0x0e00 << 16) | (0x30904 >> 2),
890 0x00000000,
891 (0x0e00 << 16) | (0x89b4 >> 2),
892 0x00000000,
893 (0x0e00 << 16) | (0x3e1fc >> 2),
894 0x00000000,
895 (0x0e00 << 16) | (0x3c210 >> 2),
896 0x00000000,
897 (0x0e00 << 16) | (0x3c214 >> 2),
898 0x00000000,
899 (0x0e00 << 16) | (0x3c218 >> 2),
900 0x00000000,
901 (0x0e00 << 16) | (0x8904 >> 2),
902 0x00000000,
903 0x5,
904 (0x0e00 << 16) | (0x8c28 >> 2),
905 (0x0e00 << 16) | (0x8c2c >> 2),
906 (0x0e00 << 16) | (0x8c30 >> 2),
907 (0x0e00 << 16) | (0x8c34 >> 2),
908 (0x0e00 << 16) | (0x9600 >> 2),
909};
910
Alex Deucher0aafd312013-04-09 14:43:30 -0400911static const u32 bonaire_golden_spm_registers[] =
912{
913 0x30800, 0xe0ffffff, 0xe0000000
914};
915
916static const u32 bonaire_golden_common_registers[] =
917{
918 0xc770, 0xffffffff, 0x00000800,
919 0xc774, 0xffffffff, 0x00000800,
920 0xc798, 0xffffffff, 0x00007fbf,
921 0xc79c, 0xffffffff, 0x00007faf
922};
923
924static const u32 bonaire_golden_registers[] =
925{
926 0x3354, 0x00000333, 0x00000333,
927 0x3350, 0x000c0fc0, 0x00040200,
928 0x9a10, 0x00010000, 0x00058208,
929 0x3c000, 0xffff1fff, 0x00140000,
930 0x3c200, 0xfdfc0fff, 0x00000100,
931 0x3c234, 0x40000000, 0x40000200,
932 0x9830, 0xffffffff, 0x00000000,
933 0x9834, 0xf00fffff, 0x00000400,
934 0x9838, 0x0002021c, 0x00020200,
935 0xc78, 0x00000080, 0x00000000,
936 0x5bb0, 0x000000f0, 0x00000070,
937 0x5bc0, 0xf0311fff, 0x80300000,
938 0x98f8, 0x73773777, 0x12010001,
939 0x350c, 0x00810000, 0x408af000,
940 0x7030, 0x31000111, 0x00000011,
941 0x2f48, 0x73773777, 0x12010001,
942 0x220c, 0x00007fb6, 0x0021a1b1,
943 0x2210, 0x00007fb6, 0x002021b1,
944 0x2180, 0x00007fb6, 0x00002191,
945 0x2218, 0x00007fb6, 0x002121b1,
946 0x221c, 0x00007fb6, 0x002021b1,
947 0x21dc, 0x00007fb6, 0x00002191,
948 0x21e0, 0x00007fb6, 0x00002191,
949 0x3628, 0x0000003f, 0x0000000a,
950 0x362c, 0x0000003f, 0x0000000a,
951 0x2ae4, 0x00073ffe, 0x000022a2,
952 0x240c, 0x000007ff, 0x00000000,
953 0x8a14, 0xf000003f, 0x00000007,
954 0x8bf0, 0x00002001, 0x00000001,
955 0x8b24, 0xffffffff, 0x00ffffff,
956 0x30a04, 0x0000ff0f, 0x00000000,
957 0x28a4c, 0x07ffffff, 0x06000000,
958 0x4d8, 0x00000fff, 0x00000100,
959 0x3e78, 0x00000001, 0x00000002,
960 0x9100, 0x03000000, 0x0362c688,
961 0x8c00, 0x000000ff, 0x00000001,
962 0xe40, 0x00001fff, 0x00001fff,
963 0x9060, 0x0000007f, 0x00000020,
964 0x9508, 0x00010000, 0x00010000,
965 0xac14, 0x000003ff, 0x000000f3,
966 0xac0c, 0xffffffff, 0x00001032
967};
968
969static const u32 bonaire_mgcg_cgcg_init[] =
970{
971 0xc420, 0xffffffff, 0xfffffffc,
972 0x30800, 0xffffffff, 0xe0000000,
973 0x3c2a0, 0xffffffff, 0x00000100,
974 0x3c208, 0xffffffff, 0x00000100,
975 0x3c2c0, 0xffffffff, 0xc0000100,
976 0x3c2c8, 0xffffffff, 0xc0000100,
977 0x3c2c4, 0xffffffff, 0xc0000100,
978 0x55e4, 0xffffffff, 0x00600100,
979 0x3c280, 0xffffffff, 0x00000100,
980 0x3c214, 0xffffffff, 0x06000100,
981 0x3c220, 0xffffffff, 0x00000100,
982 0x3c218, 0xffffffff, 0x06000100,
983 0x3c204, 0xffffffff, 0x00000100,
984 0x3c2e0, 0xffffffff, 0x00000100,
985 0x3c224, 0xffffffff, 0x00000100,
986 0x3c200, 0xffffffff, 0x00000100,
987 0x3c230, 0xffffffff, 0x00000100,
988 0x3c234, 0xffffffff, 0x00000100,
989 0x3c250, 0xffffffff, 0x00000100,
990 0x3c254, 0xffffffff, 0x00000100,
991 0x3c258, 0xffffffff, 0x00000100,
992 0x3c25c, 0xffffffff, 0x00000100,
993 0x3c260, 0xffffffff, 0x00000100,
994 0x3c27c, 0xffffffff, 0x00000100,
995 0x3c278, 0xffffffff, 0x00000100,
996 0x3c210, 0xffffffff, 0x06000100,
997 0x3c290, 0xffffffff, 0x00000100,
998 0x3c274, 0xffffffff, 0x00000100,
999 0x3c2b4, 0xffffffff, 0x00000100,
1000 0x3c2b0, 0xffffffff, 0x00000100,
1001 0x3c270, 0xffffffff, 0x00000100,
1002 0x30800, 0xffffffff, 0xe0000000,
1003 0x3c020, 0xffffffff, 0x00010000,
1004 0x3c024, 0xffffffff, 0x00030002,
1005 0x3c028, 0xffffffff, 0x00040007,
1006 0x3c02c, 0xffffffff, 0x00060005,
1007 0x3c030, 0xffffffff, 0x00090008,
1008 0x3c034, 0xffffffff, 0x00010000,
1009 0x3c038, 0xffffffff, 0x00030002,
1010 0x3c03c, 0xffffffff, 0x00040007,
1011 0x3c040, 0xffffffff, 0x00060005,
1012 0x3c044, 0xffffffff, 0x00090008,
1013 0x3c048, 0xffffffff, 0x00010000,
1014 0x3c04c, 0xffffffff, 0x00030002,
1015 0x3c050, 0xffffffff, 0x00040007,
1016 0x3c054, 0xffffffff, 0x00060005,
1017 0x3c058, 0xffffffff, 0x00090008,
1018 0x3c05c, 0xffffffff, 0x00010000,
1019 0x3c060, 0xffffffff, 0x00030002,
1020 0x3c064, 0xffffffff, 0x00040007,
1021 0x3c068, 0xffffffff, 0x00060005,
1022 0x3c06c, 0xffffffff, 0x00090008,
1023 0x3c070, 0xffffffff, 0x00010000,
1024 0x3c074, 0xffffffff, 0x00030002,
1025 0x3c078, 0xffffffff, 0x00040007,
1026 0x3c07c, 0xffffffff, 0x00060005,
1027 0x3c080, 0xffffffff, 0x00090008,
1028 0x3c084, 0xffffffff, 0x00010000,
1029 0x3c088, 0xffffffff, 0x00030002,
1030 0x3c08c, 0xffffffff, 0x00040007,
1031 0x3c090, 0xffffffff, 0x00060005,
1032 0x3c094, 0xffffffff, 0x00090008,
1033 0x3c098, 0xffffffff, 0x00010000,
1034 0x3c09c, 0xffffffff, 0x00030002,
1035 0x3c0a0, 0xffffffff, 0x00040007,
1036 0x3c0a4, 0xffffffff, 0x00060005,
1037 0x3c0a8, 0xffffffff, 0x00090008,
1038 0x3c000, 0xffffffff, 0x96e00200,
1039 0x8708, 0xffffffff, 0x00900100,
1040 0xc424, 0xffffffff, 0x0020003f,
1041 0x38, 0xffffffff, 0x0140001c,
1042 0x3c, 0x000f0000, 0x000f0000,
1043 0x220, 0xffffffff, 0xC060000C,
1044 0x224, 0xc0000fff, 0x00000100,
1045 0xf90, 0xffffffff, 0x00000100,
1046 0xf98, 0x00000101, 0x00000000,
1047 0x20a8, 0xffffffff, 0x00000104,
1048 0x55e4, 0xff000fff, 0x00000100,
1049 0x30cc, 0xc0000fff, 0x00000104,
1050 0xc1e4, 0x00000001, 0x00000001,
1051 0xd00c, 0xff000ff0, 0x00000100,
1052 0xd80c, 0xff000ff0, 0x00000100
1053};
1054
1055static const u32 spectre_golden_spm_registers[] =
1056{
1057 0x30800, 0xe0ffffff, 0xe0000000
1058};
1059
1060static const u32 spectre_golden_common_registers[] =
1061{
1062 0xc770, 0xffffffff, 0x00000800,
1063 0xc774, 0xffffffff, 0x00000800,
1064 0xc798, 0xffffffff, 0x00007fbf,
1065 0xc79c, 0xffffffff, 0x00007faf
1066};
1067
1068static const u32 spectre_golden_registers[] =
1069{
1070 0x3c000, 0xffff1fff, 0x96940200,
1071 0x3c00c, 0xffff0001, 0xff000000,
1072 0x3c200, 0xfffc0fff, 0x00000100,
1073 0x6ed8, 0x00010101, 0x00010000,
1074 0x9834, 0xf00fffff, 0x00000400,
1075 0x9838, 0xfffffffc, 0x00020200,
1076 0x5bb0, 0x000000f0, 0x00000070,
1077 0x5bc0, 0xf0311fff, 0x80300000,
1078 0x98f8, 0x73773777, 0x12010001,
1079 0x9b7c, 0x00ff0000, 0x00fc0000,
1080 0x2f48, 0x73773777, 0x12010001,
1081 0x8a14, 0xf000003f, 0x00000007,
1082 0x8b24, 0xffffffff, 0x00ffffff,
1083 0x28350, 0x3f3f3fff, 0x00000082,
1084 0x28355, 0x0000003f, 0x00000000,
1085 0x3e78, 0x00000001, 0x00000002,
1086 0x913c, 0xffff03df, 0x00000004,
1087 0xc768, 0x00000008, 0x00000008,
1088 0x8c00, 0x000008ff, 0x00000800,
1089 0x9508, 0x00010000, 0x00010000,
1090 0xac0c, 0xffffffff, 0x54763210,
1091 0x214f8, 0x01ff01ff, 0x00000002,
1092 0x21498, 0x007ff800, 0x00200000,
1093 0x2015c, 0xffffffff, 0x00000f40,
1094 0x30934, 0xffffffff, 0x00000001
1095};
1096
1097static const u32 spectre_mgcg_cgcg_init[] =
1098{
1099 0xc420, 0xffffffff, 0xfffffffc,
1100 0x30800, 0xffffffff, 0xe0000000,
1101 0x3c2a0, 0xffffffff, 0x00000100,
1102 0x3c208, 0xffffffff, 0x00000100,
1103 0x3c2c0, 0xffffffff, 0x00000100,
1104 0x3c2c8, 0xffffffff, 0x00000100,
1105 0x3c2c4, 0xffffffff, 0x00000100,
1106 0x55e4, 0xffffffff, 0x00600100,
1107 0x3c280, 0xffffffff, 0x00000100,
1108 0x3c214, 0xffffffff, 0x06000100,
1109 0x3c220, 0xffffffff, 0x00000100,
1110 0x3c218, 0xffffffff, 0x06000100,
1111 0x3c204, 0xffffffff, 0x00000100,
1112 0x3c2e0, 0xffffffff, 0x00000100,
1113 0x3c224, 0xffffffff, 0x00000100,
1114 0x3c200, 0xffffffff, 0x00000100,
1115 0x3c230, 0xffffffff, 0x00000100,
1116 0x3c234, 0xffffffff, 0x00000100,
1117 0x3c250, 0xffffffff, 0x00000100,
1118 0x3c254, 0xffffffff, 0x00000100,
1119 0x3c258, 0xffffffff, 0x00000100,
1120 0x3c25c, 0xffffffff, 0x00000100,
1121 0x3c260, 0xffffffff, 0x00000100,
1122 0x3c27c, 0xffffffff, 0x00000100,
1123 0x3c278, 0xffffffff, 0x00000100,
1124 0x3c210, 0xffffffff, 0x06000100,
1125 0x3c290, 0xffffffff, 0x00000100,
1126 0x3c274, 0xffffffff, 0x00000100,
1127 0x3c2b4, 0xffffffff, 0x00000100,
1128 0x3c2b0, 0xffffffff, 0x00000100,
1129 0x3c270, 0xffffffff, 0x00000100,
1130 0x30800, 0xffffffff, 0xe0000000,
1131 0x3c020, 0xffffffff, 0x00010000,
1132 0x3c024, 0xffffffff, 0x00030002,
1133 0x3c028, 0xffffffff, 0x00040007,
1134 0x3c02c, 0xffffffff, 0x00060005,
1135 0x3c030, 0xffffffff, 0x00090008,
1136 0x3c034, 0xffffffff, 0x00010000,
1137 0x3c038, 0xffffffff, 0x00030002,
1138 0x3c03c, 0xffffffff, 0x00040007,
1139 0x3c040, 0xffffffff, 0x00060005,
1140 0x3c044, 0xffffffff, 0x00090008,
1141 0x3c048, 0xffffffff, 0x00010000,
1142 0x3c04c, 0xffffffff, 0x00030002,
1143 0x3c050, 0xffffffff, 0x00040007,
1144 0x3c054, 0xffffffff, 0x00060005,
1145 0x3c058, 0xffffffff, 0x00090008,
1146 0x3c05c, 0xffffffff, 0x00010000,
1147 0x3c060, 0xffffffff, 0x00030002,
1148 0x3c064, 0xffffffff, 0x00040007,
1149 0x3c068, 0xffffffff, 0x00060005,
1150 0x3c06c, 0xffffffff, 0x00090008,
1151 0x3c070, 0xffffffff, 0x00010000,
1152 0x3c074, 0xffffffff, 0x00030002,
1153 0x3c078, 0xffffffff, 0x00040007,
1154 0x3c07c, 0xffffffff, 0x00060005,
1155 0x3c080, 0xffffffff, 0x00090008,
1156 0x3c084, 0xffffffff, 0x00010000,
1157 0x3c088, 0xffffffff, 0x00030002,
1158 0x3c08c, 0xffffffff, 0x00040007,
1159 0x3c090, 0xffffffff, 0x00060005,
1160 0x3c094, 0xffffffff, 0x00090008,
1161 0x3c098, 0xffffffff, 0x00010000,
1162 0x3c09c, 0xffffffff, 0x00030002,
1163 0x3c0a0, 0xffffffff, 0x00040007,
1164 0x3c0a4, 0xffffffff, 0x00060005,
1165 0x3c0a8, 0xffffffff, 0x00090008,
1166 0x3c0ac, 0xffffffff, 0x00010000,
1167 0x3c0b0, 0xffffffff, 0x00030002,
1168 0x3c0b4, 0xffffffff, 0x00040007,
1169 0x3c0b8, 0xffffffff, 0x00060005,
1170 0x3c0bc, 0xffffffff, 0x00090008,
1171 0x3c000, 0xffffffff, 0x96e00200,
1172 0x8708, 0xffffffff, 0x00900100,
1173 0xc424, 0xffffffff, 0x0020003f,
1174 0x38, 0xffffffff, 0x0140001c,
1175 0x3c, 0x000f0000, 0x000f0000,
1176 0x220, 0xffffffff, 0xC060000C,
1177 0x224, 0xc0000fff, 0x00000100,
1178 0xf90, 0xffffffff, 0x00000100,
1179 0xf98, 0x00000101, 0x00000000,
1180 0x20a8, 0xffffffff, 0x00000104,
1181 0x55e4, 0xff000fff, 0x00000100,
1182 0x30cc, 0xc0000fff, 0x00000104,
1183 0xc1e4, 0x00000001, 0x00000001,
1184 0xd00c, 0xff000ff0, 0x00000100,
1185 0xd80c, 0xff000ff0, 0x00000100
1186};
1187
1188static const u32 kalindi_golden_spm_registers[] =
1189{
1190 0x30800, 0xe0ffffff, 0xe0000000
1191};
1192
1193static const u32 kalindi_golden_common_registers[] =
1194{
1195 0xc770, 0xffffffff, 0x00000800,
1196 0xc774, 0xffffffff, 0x00000800,
1197 0xc798, 0xffffffff, 0x00007fbf,
1198 0xc79c, 0xffffffff, 0x00007faf
1199};
1200
1201static const u32 kalindi_golden_registers[] =
1202{
1203 0x3c000, 0xffffdfff, 0x6e944040,
1204 0x55e4, 0xff607fff, 0xfc000100,
1205 0x3c220, 0xff000fff, 0x00000100,
1206 0x3c224, 0xff000fff, 0x00000100,
1207 0x3c200, 0xfffc0fff, 0x00000100,
1208 0x6ed8, 0x00010101, 0x00010000,
1209 0x9830, 0xffffffff, 0x00000000,
1210 0x9834, 0xf00fffff, 0x00000400,
1211 0x5bb0, 0x000000f0, 0x00000070,
1212 0x5bc0, 0xf0311fff, 0x80300000,
1213 0x98f8, 0x73773777, 0x12010001,
1214 0x98fc, 0xffffffff, 0x00000010,
1215 0x9b7c, 0x00ff0000, 0x00fc0000,
1216 0x8030, 0x00001f0f, 0x0000100a,
1217 0x2f48, 0x73773777, 0x12010001,
1218 0x2408, 0x000fffff, 0x000c007f,
1219 0x8a14, 0xf000003f, 0x00000007,
1220 0x8b24, 0x3fff3fff, 0x00ffcfff,
1221 0x30a04, 0x0000ff0f, 0x00000000,
1222 0x28a4c, 0x07ffffff, 0x06000000,
1223 0x4d8, 0x00000fff, 0x00000100,
1224 0x3e78, 0x00000001, 0x00000002,
1225 0xc768, 0x00000008, 0x00000008,
1226 0x8c00, 0x000000ff, 0x00000003,
1227 0x214f8, 0x01ff01ff, 0x00000002,
1228 0x21498, 0x007ff800, 0x00200000,
1229 0x2015c, 0xffffffff, 0x00000f40,
1230 0x88c4, 0x001f3ae3, 0x00000082,
1231 0x88d4, 0x0000001f, 0x00000010,
1232 0x30934, 0xffffffff, 0x00000000
1233};
1234
1235static const u32 kalindi_mgcg_cgcg_init[] =
1236{
1237 0xc420, 0xffffffff, 0xfffffffc,
1238 0x30800, 0xffffffff, 0xe0000000,
1239 0x3c2a0, 0xffffffff, 0x00000100,
1240 0x3c208, 0xffffffff, 0x00000100,
1241 0x3c2c0, 0xffffffff, 0x00000100,
1242 0x3c2c8, 0xffffffff, 0x00000100,
1243 0x3c2c4, 0xffffffff, 0x00000100,
1244 0x55e4, 0xffffffff, 0x00600100,
1245 0x3c280, 0xffffffff, 0x00000100,
1246 0x3c214, 0xffffffff, 0x06000100,
1247 0x3c220, 0xffffffff, 0x00000100,
1248 0x3c218, 0xffffffff, 0x06000100,
1249 0x3c204, 0xffffffff, 0x00000100,
1250 0x3c2e0, 0xffffffff, 0x00000100,
1251 0x3c224, 0xffffffff, 0x00000100,
1252 0x3c200, 0xffffffff, 0x00000100,
1253 0x3c230, 0xffffffff, 0x00000100,
1254 0x3c234, 0xffffffff, 0x00000100,
1255 0x3c250, 0xffffffff, 0x00000100,
1256 0x3c254, 0xffffffff, 0x00000100,
1257 0x3c258, 0xffffffff, 0x00000100,
1258 0x3c25c, 0xffffffff, 0x00000100,
1259 0x3c260, 0xffffffff, 0x00000100,
1260 0x3c27c, 0xffffffff, 0x00000100,
1261 0x3c278, 0xffffffff, 0x00000100,
1262 0x3c210, 0xffffffff, 0x06000100,
1263 0x3c290, 0xffffffff, 0x00000100,
1264 0x3c274, 0xffffffff, 0x00000100,
1265 0x3c2b4, 0xffffffff, 0x00000100,
1266 0x3c2b0, 0xffffffff, 0x00000100,
1267 0x3c270, 0xffffffff, 0x00000100,
1268 0x30800, 0xffffffff, 0xe0000000,
1269 0x3c020, 0xffffffff, 0x00010000,
1270 0x3c024, 0xffffffff, 0x00030002,
1271 0x3c028, 0xffffffff, 0x00040007,
1272 0x3c02c, 0xffffffff, 0x00060005,
1273 0x3c030, 0xffffffff, 0x00090008,
1274 0x3c034, 0xffffffff, 0x00010000,
1275 0x3c038, 0xffffffff, 0x00030002,
1276 0x3c03c, 0xffffffff, 0x00040007,
1277 0x3c040, 0xffffffff, 0x00060005,
1278 0x3c044, 0xffffffff, 0x00090008,
1279 0x3c000, 0xffffffff, 0x96e00200,
1280 0x8708, 0xffffffff, 0x00900100,
1281 0xc424, 0xffffffff, 0x0020003f,
1282 0x38, 0xffffffff, 0x0140001c,
1283 0x3c, 0x000f0000, 0x000f0000,
1284 0x220, 0xffffffff, 0xC060000C,
1285 0x224, 0xc0000fff, 0x00000100,
1286 0x20a8, 0xffffffff, 0x00000104,
1287 0x55e4, 0xff000fff, 0x00000100,
1288 0x30cc, 0xc0000fff, 0x00000104,
1289 0xc1e4, 0x00000001, 0x00000001,
1290 0xd00c, 0xff000ff0, 0x00000100,
1291 0xd80c, 0xff000ff0, 0x00000100
1292};
1293
1294static void cik_init_golden_registers(struct radeon_device *rdev)
1295{
1296 switch (rdev->family) {
1297 case CHIP_BONAIRE:
1298 radeon_program_register_sequence(rdev,
1299 bonaire_mgcg_cgcg_init,
1300 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301 radeon_program_register_sequence(rdev,
1302 bonaire_golden_registers,
1303 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304 radeon_program_register_sequence(rdev,
1305 bonaire_golden_common_registers,
1306 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307 radeon_program_register_sequence(rdev,
1308 bonaire_golden_spm_registers,
1309 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1310 break;
1311 case CHIP_KABINI:
1312 radeon_program_register_sequence(rdev,
1313 kalindi_mgcg_cgcg_init,
1314 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315 radeon_program_register_sequence(rdev,
1316 kalindi_golden_registers,
1317 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318 radeon_program_register_sequence(rdev,
1319 kalindi_golden_common_registers,
1320 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321 radeon_program_register_sequence(rdev,
1322 kalindi_golden_spm_registers,
1323 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1324 break;
1325 case CHIP_KAVERI:
1326 radeon_program_register_sequence(rdev,
1327 spectre_mgcg_cgcg_init,
1328 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329 radeon_program_register_sequence(rdev,
1330 spectre_golden_registers,
1331 (const u32)ARRAY_SIZE(spectre_golden_registers));
1332 radeon_program_register_sequence(rdev,
1333 spectre_golden_common_registers,
1334 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335 radeon_program_register_sequence(rdev,
1336 spectre_golden_spm_registers,
1337 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1338 break;
1339 default:
1340 break;
1341 }
1342}
1343
Alex Deucher2c679122013-04-09 13:32:18 -04001344/**
1345 * cik_get_xclk - get the xclk
1346 *
1347 * @rdev: radeon_device pointer
1348 *
1349 * Returns the reference clock used by the gfx engine
1350 * (CIK).
1351 */
1352u32 cik_get_xclk(struct radeon_device *rdev)
1353{
1354 u32 reference_clock = rdev->clock.spll.reference_freq;
1355
1356 if (rdev->flags & RADEON_IS_IGP) {
1357 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358 return reference_clock / 2;
1359 } else {
1360 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361 return reference_clock / 4;
1362 }
1363 return reference_clock;
1364}
1365
Alex Deucher75efdee2013-03-04 12:47:46 -05001366/**
1367 * cik_mm_rdoorbell - read a doorbell dword
1368 *
1369 * @rdev: radeon_device pointer
1370 * @offset: byte offset into the aperture
1371 *
1372 * Returns the value in the doorbell aperture at the
1373 * requested offset (CIK).
1374 */
1375u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1376{
1377 if (offset < rdev->doorbell.size) {
1378 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1379 } else {
1380 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1381 return 0;
1382 }
1383}
1384
1385/**
1386 * cik_mm_wdoorbell - write a doorbell dword
1387 *
1388 * @rdev: radeon_device pointer
1389 * @offset: byte offset into the aperture
1390 * @v: value to write
1391 *
1392 * Writes @v to the doorbell aperture at the
1393 * requested offset (CIK).
1394 */
1395void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1396{
1397 if (offset < rdev->doorbell.size) {
1398 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1399 } else {
1400 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1401 }
1402}
1403
Alex Deucherbc8273f2012-06-29 19:44:04 -04001404#define BONAIRE_IO_MC_REGS_SIZE 36
1405
1406static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1407{
1408 {0x00000070, 0x04400000},
1409 {0x00000071, 0x80c01803},
1410 {0x00000072, 0x00004004},
1411 {0x00000073, 0x00000100},
1412 {0x00000074, 0x00ff0000},
1413 {0x00000075, 0x34000000},
1414 {0x00000076, 0x08000014},
1415 {0x00000077, 0x00cc08ec},
1416 {0x00000078, 0x00000400},
1417 {0x00000079, 0x00000000},
1418 {0x0000007a, 0x04090000},
1419 {0x0000007c, 0x00000000},
1420 {0x0000007e, 0x4408a8e8},
1421 {0x0000007f, 0x00000304},
1422 {0x00000080, 0x00000000},
1423 {0x00000082, 0x00000001},
1424 {0x00000083, 0x00000002},
1425 {0x00000084, 0xf3e4f400},
1426 {0x00000085, 0x052024e3},
1427 {0x00000087, 0x00000000},
1428 {0x00000088, 0x01000000},
1429 {0x0000008a, 0x1c0a0000},
1430 {0x0000008b, 0xff010000},
1431 {0x0000008d, 0xffffefff},
1432 {0x0000008e, 0xfff3efff},
1433 {0x0000008f, 0xfff3efbf},
1434 {0x00000092, 0xf7ffffff},
1435 {0x00000093, 0xffffff7f},
1436 {0x00000095, 0x00101101},
1437 {0x00000096, 0x00000fff},
1438 {0x00000097, 0x00116fff},
1439 {0x00000098, 0x60010000},
1440 {0x00000099, 0x10010000},
1441 {0x0000009a, 0x00006000},
1442 {0x0000009b, 0x00001000},
1443 {0x0000009f, 0x00b48000}
1444};
1445
Alex Deucherb556b122013-01-29 10:44:22 -05001446/**
1447 * cik_srbm_select - select specific register instances
1448 *
1449 * @rdev: radeon_device pointer
1450 * @me: selected ME (micro engine)
1451 * @pipe: pipe
1452 * @queue: queue
1453 * @vmid: VMID
1454 *
1455 * Switches the currently active registers instances. Some
1456 * registers are instanced per VMID, others are instanced per
1457 * me/pipe/queue combination.
1458 */
1459static void cik_srbm_select(struct radeon_device *rdev,
1460 u32 me, u32 pipe, u32 queue, u32 vmid)
1461{
1462 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1463 MEID(me & 0x3) |
1464 VMID(vmid & 0xf) |
1465 QUEUEID(queue & 0x7));
1466 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1467}
1468
Alex Deucherbc8273f2012-06-29 19:44:04 -04001469/* ucode loading */
1470/**
1471 * ci_mc_load_microcode - load MC ucode into the hw
1472 *
1473 * @rdev: radeon_device pointer
1474 *
1475 * Load the GDDR MC ucode into the hw (CIK).
1476 * Returns 0 on success, error on failure.
1477 */
1478static int ci_mc_load_microcode(struct radeon_device *rdev)
1479{
1480 const __be32 *fw_data;
1481 u32 running, blackout = 0;
1482 u32 *io_mc_regs;
1483 int i, ucode_size, regs_size;
1484
1485 if (!rdev->mc_fw)
1486 return -EINVAL;
1487
1488 switch (rdev->family) {
1489 case CHIP_BONAIRE:
1490 default:
1491 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492 ucode_size = CIK_MC_UCODE_SIZE;
1493 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1494 break;
1495 }
1496
1497 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1498
1499 if (running == 0) {
1500 if (running) {
1501 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1503 }
1504
1505 /* reset the engine and set to writable */
1506 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1508
1509 /* load mc io regs */
1510 for (i = 0; i < regs_size; i++) {
1511 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1513 }
1514 /* load the MC ucode */
1515 fw_data = (const __be32 *)rdev->mc_fw->data;
1516 for (i = 0; i < ucode_size; i++)
1517 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1518
1519 /* put the engine back into the active state */
1520 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1523
1524 /* wait for training to complete */
1525 for (i = 0; i < rdev->usec_timeout; i++) {
1526 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1527 break;
1528 udelay(1);
1529 }
1530 for (i = 0; i < rdev->usec_timeout; i++) {
1531 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1532 break;
1533 udelay(1);
1534 }
1535
1536 if (running)
1537 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1538 }
1539
1540 return 0;
1541}
1542
Alex Deucher02c81322012-12-18 21:43:07 -05001543/**
1544 * cik_init_microcode - load ucode images from disk
1545 *
1546 * @rdev: radeon_device pointer
1547 *
1548 * Use the firmware interface to load the ucode images into
1549 * the driver (not loaded into hw).
1550 * Returns 0 on success, error on failure.
1551 */
1552static int cik_init_microcode(struct radeon_device *rdev)
1553{
Alex Deucher02c81322012-12-18 21:43:07 -05001554 const char *chip_name;
1555 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -04001556 mec_req_size, rlc_req_size, mc_req_size,
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001557 sdma_req_size, smc_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -05001558 char fw_name[30];
1559 int err;
1560
1561 DRM_DEBUG("\n");
1562
Alex Deucher02c81322012-12-18 21:43:07 -05001563 switch (rdev->family) {
1564 case CHIP_BONAIRE:
1565 chip_name = "BONAIRE";
1566 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567 me_req_size = CIK_ME_UCODE_SIZE * 4;
1568 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001572 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001573 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
Alex Deucher02c81322012-12-18 21:43:07 -05001574 break;
1575 case CHIP_KAVERI:
1576 chip_name = "KAVERI";
1577 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001582 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001583 break;
1584 case CHIP_KABINI:
1585 chip_name = "KABINI";
1586 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001591 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001592 break;
1593 default: BUG();
1594 }
1595
1596 DRM_INFO("Loading %s Microcode\n", chip_name);
1597
1598 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001599 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001600 if (err)
1601 goto out;
1602 if (rdev->pfp_fw->size != pfp_req_size) {
1603 printk(KERN_ERR
1604 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605 rdev->pfp_fw->size, fw_name);
1606 err = -EINVAL;
1607 goto out;
1608 }
1609
1610 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001611 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001612 if (err)
1613 goto out;
1614 if (rdev->me_fw->size != me_req_size) {
1615 printk(KERN_ERR
1616 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617 rdev->me_fw->size, fw_name);
1618 err = -EINVAL;
1619 }
1620
1621 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001622 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001623 if (err)
1624 goto out;
1625 if (rdev->ce_fw->size != ce_req_size) {
1626 printk(KERN_ERR
1627 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 rdev->ce_fw->size, fw_name);
1629 err = -EINVAL;
1630 }
1631
1632 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001633 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001634 if (err)
1635 goto out;
1636 if (rdev->mec_fw->size != mec_req_size) {
1637 printk(KERN_ERR
1638 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 rdev->mec_fw->size, fw_name);
1640 err = -EINVAL;
1641 }
1642
1643 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001644 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001645 if (err)
1646 goto out;
1647 if (rdev->rlc_fw->size != rlc_req_size) {
1648 printk(KERN_ERR
1649 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650 rdev->rlc_fw->size, fw_name);
1651 err = -EINVAL;
1652 }
1653
Alex Deucher21a93e12013-04-09 12:47:11 -04001654 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001655 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -04001656 if (err)
1657 goto out;
1658 if (rdev->sdma_fw->size != sdma_req_size) {
1659 printk(KERN_ERR
1660 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661 rdev->sdma_fw->size, fw_name);
1662 err = -EINVAL;
1663 }
1664
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001665 /* No SMC, MC ucode on APUs */
Alex Deucher02c81322012-12-18 21:43:07 -05001666 if (!(rdev->flags & RADEON_IS_IGP)) {
1667 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001668 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001669 if (err)
1670 goto out;
1671 if (rdev->mc_fw->size != mc_req_size) {
1672 printk(KERN_ERR
1673 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674 rdev->mc_fw->size, fw_name);
1675 err = -EINVAL;
1676 }
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001677
1678 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1680 if (err) {
1681 printk(KERN_ERR
1682 "smc: error loading firmware \"%s\"\n",
1683 fw_name);
1684 release_firmware(rdev->smc_fw);
1685 rdev->smc_fw = NULL;
1686 } else if (rdev->smc_fw->size != smc_req_size) {
1687 printk(KERN_ERR
1688 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689 rdev->smc_fw->size, fw_name);
1690 err = -EINVAL;
1691 }
Alex Deucher02c81322012-12-18 21:43:07 -05001692 }
1693
1694out:
Alex Deucher02c81322012-12-18 21:43:07 -05001695 if (err) {
1696 if (err != -EINVAL)
1697 printk(KERN_ERR
1698 "cik_cp: Failed to load firmware \"%s\"\n",
1699 fw_name);
1700 release_firmware(rdev->pfp_fw);
1701 rdev->pfp_fw = NULL;
1702 release_firmware(rdev->me_fw);
1703 rdev->me_fw = NULL;
1704 release_firmware(rdev->ce_fw);
1705 rdev->ce_fw = NULL;
1706 release_firmware(rdev->rlc_fw);
1707 rdev->rlc_fw = NULL;
1708 release_firmware(rdev->mc_fw);
1709 rdev->mc_fw = NULL;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001710 release_firmware(rdev->smc_fw);
1711 rdev->smc_fw = NULL;
Alex Deucher02c81322012-12-18 21:43:07 -05001712 }
1713 return err;
1714}
1715
Alex Deucher8cc1a532013-04-09 12:41:24 -04001716/*
1717 * Core functions
1718 */
1719/**
1720 * cik_tiling_mode_table_init - init the hw tiling table
1721 *
1722 * @rdev: radeon_device pointer
1723 *
1724 * Starting with SI, the tiling setup is done globally in a
1725 * set of 32 tiling modes. Rather than selecting each set of
1726 * parameters per surface as on older asics, we just select
1727 * which index in the tiling table we want to use, and the
1728 * surface uses those parameters (CIK).
1729 */
1730static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1731{
1732 const u32 num_tile_mode_states = 32;
1733 const u32 num_secondary_tile_mode_states = 16;
1734 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735 u32 num_pipe_configs;
1736 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737 rdev->config.cik.max_shader_engines;
1738
1739 switch (rdev->config.cik.mem_row_size_in_kb) {
1740 case 1:
1741 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1742 break;
1743 case 2:
1744 default:
1745 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1746 break;
1747 case 4:
1748 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1749 break;
1750 }
1751
1752 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753 if (num_pipe_configs > 8)
1754 num_pipe_configs = 8; /* ??? */
1755
1756 if (num_pipe_configs == 8) {
1757 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758 switch (reg_offset) {
1759 case 0:
1760 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1764 break;
1765 case 1:
1766 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1770 break;
1771 case 2:
1772 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1776 break;
1777 case 3:
1778 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1782 break;
1783 case 4:
1784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 TILE_SPLIT(split_equal_to_row_size));
1788 break;
1789 case 5:
1790 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1792 break;
1793 case 6:
1794 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1798 break;
1799 case 7:
1800 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803 TILE_SPLIT(split_equal_to_row_size));
1804 break;
1805 case 8:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1808 break;
1809 case 9:
1810 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1812 break;
1813 case 10:
1814 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1818 break;
1819 case 11:
1820 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1824 break;
1825 case 12:
1826 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830 break;
1831 case 13:
1832 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1834 break;
1835 case 14:
1836 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1840 break;
1841 case 16:
1842 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1846 break;
1847 case 17:
1848 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852 break;
1853 case 27:
1854 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1856 break;
1857 case 28:
1858 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1862 break;
1863 case 29:
1864 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1868 break;
1869 case 30:
1870 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874 break;
1875 default:
1876 gb_tile_moden = 0;
1877 break;
1878 }
Alex Deucher39aee492013-04-10 13:41:25 -04001879 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001880 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1881 }
1882 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883 switch (reg_offset) {
1884 case 0:
1885 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888 NUM_BANKS(ADDR_SURF_16_BANK));
1889 break;
1890 case 1:
1891 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894 NUM_BANKS(ADDR_SURF_16_BANK));
1895 break;
1896 case 2:
1897 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900 NUM_BANKS(ADDR_SURF_16_BANK));
1901 break;
1902 case 3:
1903 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 NUM_BANKS(ADDR_SURF_16_BANK));
1907 break;
1908 case 4:
1909 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912 NUM_BANKS(ADDR_SURF_8_BANK));
1913 break;
1914 case 5:
1915 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918 NUM_BANKS(ADDR_SURF_4_BANK));
1919 break;
1920 case 6:
1921 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 NUM_BANKS(ADDR_SURF_2_BANK));
1925 break;
1926 case 8:
1927 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930 NUM_BANKS(ADDR_SURF_16_BANK));
1931 break;
1932 case 9:
1933 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936 NUM_BANKS(ADDR_SURF_16_BANK));
1937 break;
1938 case 10:
1939 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942 NUM_BANKS(ADDR_SURF_16_BANK));
1943 break;
1944 case 11:
1945 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948 NUM_BANKS(ADDR_SURF_16_BANK));
1949 break;
1950 case 12:
1951 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954 NUM_BANKS(ADDR_SURF_8_BANK));
1955 break;
1956 case 13:
1957 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960 NUM_BANKS(ADDR_SURF_4_BANK));
1961 break;
1962 case 14:
1963 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 NUM_BANKS(ADDR_SURF_2_BANK));
1967 break;
1968 default:
1969 gb_tile_moden = 0;
1970 break;
1971 }
1972 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1973 }
1974 } else if (num_pipe_configs == 4) {
1975 if (num_rbs == 4) {
1976 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977 switch (reg_offset) {
1978 case 0:
1979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1983 break;
1984 case 1:
1985 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1989 break;
1990 case 2:
1991 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1995 break;
1996 case 3:
1997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2001 break;
2002 case 4:
2003 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 TILE_SPLIT(split_equal_to_row_size));
2007 break;
2008 case 5:
2009 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2011 break;
2012 case 6:
2013 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017 break;
2018 case 7:
2019 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022 TILE_SPLIT(split_equal_to_row_size));
2023 break;
2024 case 8:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2027 break;
2028 case 9:
2029 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2031 break;
2032 case 10:
2033 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2037 break;
2038 case 11:
2039 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043 break;
2044 case 12:
2045 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 break;
2050 case 13:
2051 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2053 break;
2054 case 14:
2055 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059 break;
2060 case 16:
2061 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065 break;
2066 case 17:
2067 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 break;
2072 case 27:
2073 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2075 break;
2076 case 28:
2077 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 break;
2082 case 29:
2083 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 break;
2088 case 30:
2089 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 break;
2094 default:
2095 gb_tile_moden = 0;
2096 break;
2097 }
Alex Deucher39aee492013-04-10 13:41:25 -04002098 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002099 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2100 }
2101 } else if (num_rbs < 4) {
2102 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103 switch (reg_offset) {
2104 case 0:
2105 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2109 break;
2110 case 1:
2111 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2115 break;
2116 case 2:
2117 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2121 break;
2122 case 3:
2123 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2127 break;
2128 case 4:
2129 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 TILE_SPLIT(split_equal_to_row_size));
2133 break;
2134 case 5:
2135 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137 break;
2138 case 6:
2139 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2143 break;
2144 case 7:
2145 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 TILE_SPLIT(split_equal_to_row_size));
2149 break;
2150 case 8:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2153 break;
2154 case 9:
2155 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2157 break;
2158 case 10:
2159 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163 break;
2164 case 11:
2165 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169 break;
2170 case 12:
2171 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175 break;
2176 case 13:
2177 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2179 break;
2180 case 14:
2181 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185 break;
2186 case 16:
2187 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191 break;
2192 case 17:
2193 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 break;
2198 case 27:
2199 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2201 break;
2202 case 28:
2203 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207 break;
2208 case 29:
2209 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213 break;
2214 case 30:
2215 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219 break;
2220 default:
2221 gb_tile_moden = 0;
2222 break;
2223 }
Alex Deucher39aee492013-04-10 13:41:25 -04002224 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002225 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2226 }
2227 }
2228 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229 switch (reg_offset) {
2230 case 0:
2231 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234 NUM_BANKS(ADDR_SURF_16_BANK));
2235 break;
2236 case 1:
2237 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240 NUM_BANKS(ADDR_SURF_16_BANK));
2241 break;
2242 case 2:
2243 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 NUM_BANKS(ADDR_SURF_16_BANK));
2247 break;
2248 case 3:
2249 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 NUM_BANKS(ADDR_SURF_16_BANK));
2253 break;
2254 case 4:
2255 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_16_BANK));
2259 break;
2260 case 5:
2261 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 NUM_BANKS(ADDR_SURF_8_BANK));
2265 break;
2266 case 6:
2267 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 NUM_BANKS(ADDR_SURF_4_BANK));
2271 break;
2272 case 8:
2273 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276 NUM_BANKS(ADDR_SURF_16_BANK));
2277 break;
2278 case 9:
2279 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 NUM_BANKS(ADDR_SURF_16_BANK));
2283 break;
2284 case 10:
2285 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 NUM_BANKS(ADDR_SURF_16_BANK));
2289 break;
2290 case 11:
2291 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2295 break;
2296 case 12:
2297 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300 NUM_BANKS(ADDR_SURF_16_BANK));
2301 break;
2302 case 13:
2303 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306 NUM_BANKS(ADDR_SURF_8_BANK));
2307 break;
2308 case 14:
2309 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312 NUM_BANKS(ADDR_SURF_4_BANK));
2313 break;
2314 default:
2315 gb_tile_moden = 0;
2316 break;
2317 }
2318 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2319 }
2320 } else if (num_pipe_configs == 2) {
2321 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322 switch (reg_offset) {
2323 case 0:
2324 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 PIPE_CONFIG(ADDR_SURF_P2) |
2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2328 break;
2329 case 1:
2330 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 PIPE_CONFIG(ADDR_SURF_P2) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2334 break;
2335 case 2:
2336 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 PIPE_CONFIG(ADDR_SURF_P2) |
2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2340 break;
2341 case 3:
2342 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 PIPE_CONFIG(ADDR_SURF_P2) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2346 break;
2347 case 4:
2348 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 PIPE_CONFIG(ADDR_SURF_P2) |
2351 TILE_SPLIT(split_equal_to_row_size));
2352 break;
2353 case 5:
2354 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 break;
2357 case 6:
2358 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360 PIPE_CONFIG(ADDR_SURF_P2) |
2361 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2362 break;
2363 case 7:
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 PIPE_CONFIG(ADDR_SURF_P2) |
2367 TILE_SPLIT(split_equal_to_row_size));
2368 break;
2369 case 8:
2370 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2371 break;
2372 case 9:
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2375 break;
2376 case 10:
2377 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 PIPE_CONFIG(ADDR_SURF_P2) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 break;
2382 case 11:
2383 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 PIPE_CONFIG(ADDR_SURF_P2) |
2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387 break;
2388 case 12:
2389 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 PIPE_CONFIG(ADDR_SURF_P2) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 break;
2394 case 13:
2395 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2397 break;
2398 case 14:
2399 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 PIPE_CONFIG(ADDR_SURF_P2) |
2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 break;
2404 case 16:
2405 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 PIPE_CONFIG(ADDR_SURF_P2) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 break;
2410 case 17:
2411 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 break;
2416 case 27:
2417 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2419 break;
2420 case 28:
2421 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423 PIPE_CONFIG(ADDR_SURF_P2) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 break;
2426 case 29:
2427 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 PIPE_CONFIG(ADDR_SURF_P2) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 break;
2432 case 30:
2433 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 break;
2438 default:
2439 gb_tile_moden = 0;
2440 break;
2441 }
Alex Deucher39aee492013-04-10 13:41:25 -04002442 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002443 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2444 }
2445 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446 switch (reg_offset) {
2447 case 0:
2448 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451 NUM_BANKS(ADDR_SURF_16_BANK));
2452 break;
2453 case 1:
2454 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457 NUM_BANKS(ADDR_SURF_16_BANK));
2458 break;
2459 case 2:
2460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 NUM_BANKS(ADDR_SURF_16_BANK));
2464 break;
2465 case 3:
2466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2470 break;
2471 case 4:
2472 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2476 break;
2477 case 5:
2478 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 NUM_BANKS(ADDR_SURF_16_BANK));
2482 break;
2483 case 6:
2484 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 NUM_BANKS(ADDR_SURF_8_BANK));
2488 break;
2489 case 8:
2490 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2494 break;
2495 case 9:
2496 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499 NUM_BANKS(ADDR_SURF_16_BANK));
2500 break;
2501 case 10:
2502 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 NUM_BANKS(ADDR_SURF_16_BANK));
2506 break;
2507 case 11:
2508 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 NUM_BANKS(ADDR_SURF_16_BANK));
2512 break;
2513 case 12:
2514 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2518 break;
2519 case 13:
2520 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 NUM_BANKS(ADDR_SURF_16_BANK));
2524 break;
2525 case 14:
2526 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_8_BANK));
2530 break;
2531 default:
2532 gb_tile_moden = 0;
2533 break;
2534 }
2535 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2536 }
2537 } else
2538 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2539}
2540
2541/**
2542 * cik_select_se_sh - select which SE, SH to address
2543 *
2544 * @rdev: radeon_device pointer
2545 * @se_num: shader engine to address
2546 * @sh_num: sh block to address
2547 *
2548 * Select which SE, SH combinations to address. Certain
2549 * registers are instanced per SE or SH. 0xffffffff means
2550 * broadcast to all SEs or SHs (CIK).
2551 */
2552static void cik_select_se_sh(struct radeon_device *rdev,
2553 u32 se_num, u32 sh_num)
2554{
2555 u32 data = INSTANCE_BROADCAST_WRITES;
2556
2557 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04002558 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002559 else if (se_num == 0xffffffff)
2560 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561 else if (sh_num == 0xffffffff)
2562 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2563 else
2564 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565 WREG32(GRBM_GFX_INDEX, data);
2566}
2567
2568/**
2569 * cik_create_bitmask - create a bitmask
2570 *
2571 * @bit_width: length of the mask
2572 *
2573 * create a variable length bit mask (CIK).
2574 * Returns the bitmask.
2575 */
2576static u32 cik_create_bitmask(u32 bit_width)
2577{
2578 u32 i, mask = 0;
2579
2580 for (i = 0; i < bit_width; i++) {
2581 mask <<= 1;
2582 mask |= 1;
2583 }
2584 return mask;
2585}
2586
2587/**
2588 * cik_select_se_sh - select which SE, SH to address
2589 *
2590 * @rdev: radeon_device pointer
2591 * @max_rb_num: max RBs (render backends) for the asic
2592 * @se_num: number of SEs (shader engines) for the asic
2593 * @sh_per_se: number of SH blocks per SE for the asic
2594 *
2595 * Calculates the bitmask of disabled RBs (CIK).
2596 * Returns the disabled RB bitmask.
2597 */
2598static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599 u32 max_rb_num, u32 se_num,
2600 u32 sh_per_se)
2601{
2602 u32 data, mask;
2603
2604 data = RREG32(CC_RB_BACKEND_DISABLE);
2605 if (data & 1)
2606 data &= BACKEND_DISABLE_MASK;
2607 else
2608 data = 0;
2609 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2610
2611 data >>= BACKEND_DISABLE_SHIFT;
2612
2613 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2614
2615 return data & mask;
2616}
2617
2618/**
2619 * cik_setup_rb - setup the RBs on the asic
2620 *
2621 * @rdev: radeon_device pointer
2622 * @se_num: number of SEs (shader engines) for the asic
2623 * @sh_per_se: number of SH blocks per SE for the asic
2624 * @max_rb_num: max RBs (render backends) for the asic
2625 *
2626 * Configures per-SE/SH RB registers (CIK).
2627 */
2628static void cik_setup_rb(struct radeon_device *rdev,
2629 u32 se_num, u32 sh_per_se,
2630 u32 max_rb_num)
2631{
2632 int i, j;
2633 u32 data, mask;
2634 u32 disabled_rbs = 0;
2635 u32 enabled_rbs = 0;
2636
2637 for (i = 0; i < se_num; i++) {
2638 for (j = 0; j < sh_per_se; j++) {
2639 cik_select_se_sh(rdev, i, j);
2640 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2642 }
2643 }
2644 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2645
2646 mask = 1;
2647 for (i = 0; i < max_rb_num; i++) {
2648 if (!(disabled_rbs & mask))
2649 enabled_rbs |= mask;
2650 mask <<= 1;
2651 }
2652
2653 for (i = 0; i < se_num; i++) {
2654 cik_select_se_sh(rdev, i, 0xffffffff);
2655 data = 0;
2656 for (j = 0; j < sh_per_se; j++) {
2657 switch (enabled_rbs & 3) {
2658 case 1:
2659 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2660 break;
2661 case 2:
2662 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2663 break;
2664 case 3:
2665 default:
2666 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2667 break;
2668 }
2669 enabled_rbs >>= 2;
2670 }
2671 WREG32(PA_SC_RASTER_CONFIG, data);
2672 }
2673 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2674}
2675
2676/**
2677 * cik_gpu_init - setup the 3D engine
2678 *
2679 * @rdev: radeon_device pointer
2680 *
2681 * Configures the 3D engine and tiling configuration
2682 * registers so that the 3D engine is usable.
2683 */
2684static void cik_gpu_init(struct radeon_device *rdev)
2685{
2686 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687 u32 mc_shared_chmap, mc_arb_ramcfg;
2688 u32 hdp_host_path_cntl;
2689 u32 tmp;
2690 int i, j;
2691
2692 switch (rdev->family) {
2693 case CHIP_BONAIRE:
2694 rdev->config.cik.max_shader_engines = 2;
2695 rdev->config.cik.max_tile_pipes = 4;
2696 rdev->config.cik.max_cu_per_sh = 7;
2697 rdev->config.cik.max_sh_per_se = 1;
2698 rdev->config.cik.max_backends_per_se = 2;
2699 rdev->config.cik.max_texture_channel_caches = 4;
2700 rdev->config.cik.max_gprs = 256;
2701 rdev->config.cik.max_gs_threads = 32;
2702 rdev->config.cik.max_hw_contexts = 8;
2703
2704 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2709 break;
2710 case CHIP_KAVERI:
2711 /* TODO */
2712 break;
2713 case CHIP_KABINI:
2714 default:
2715 rdev->config.cik.max_shader_engines = 1;
2716 rdev->config.cik.max_tile_pipes = 2;
2717 rdev->config.cik.max_cu_per_sh = 2;
2718 rdev->config.cik.max_sh_per_se = 1;
2719 rdev->config.cik.max_backends_per_se = 1;
2720 rdev->config.cik.max_texture_channel_caches = 2;
2721 rdev->config.cik.max_gprs = 256;
2722 rdev->config.cik.max_gs_threads = 16;
2723 rdev->config.cik.max_hw_contexts = 8;
2724
2725 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2726 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2727 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2728 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2729 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2730 break;
2731 }
2732
2733 /* Initialize HDP */
2734 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2735 WREG32((0x2c14 + j), 0x00000000);
2736 WREG32((0x2c18 + j), 0x00000000);
2737 WREG32((0x2c1c + j), 0x00000000);
2738 WREG32((0x2c20 + j), 0x00000000);
2739 WREG32((0x2c24 + j), 0x00000000);
2740 }
2741
2742 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2743
2744 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2745
2746 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2747 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2748
2749 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2750 rdev->config.cik.mem_max_burst_length_bytes = 256;
2751 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2752 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2753 if (rdev->config.cik.mem_row_size_in_kb > 4)
2754 rdev->config.cik.mem_row_size_in_kb = 4;
2755 /* XXX use MC settings? */
2756 rdev->config.cik.shader_engine_tile_size = 32;
2757 rdev->config.cik.num_gpus = 1;
2758 rdev->config.cik.multi_gpu_tile_size = 64;
2759
2760 /* fix up row size */
2761 gb_addr_config &= ~ROW_SIZE_MASK;
2762 switch (rdev->config.cik.mem_row_size_in_kb) {
2763 case 1:
2764 default:
2765 gb_addr_config |= ROW_SIZE(0);
2766 break;
2767 case 2:
2768 gb_addr_config |= ROW_SIZE(1);
2769 break;
2770 case 4:
2771 gb_addr_config |= ROW_SIZE(2);
2772 break;
2773 }
2774
2775 /* setup tiling info dword. gb_addr_config is not adequate since it does
2776 * not have bank info, so create a custom tiling dword.
2777 * bits 3:0 num_pipes
2778 * bits 7:4 num_banks
2779 * bits 11:8 group_size
2780 * bits 15:12 row_size
2781 */
2782 rdev->config.cik.tile_config = 0;
2783 switch (rdev->config.cik.num_tile_pipes) {
2784 case 1:
2785 rdev->config.cik.tile_config |= (0 << 0);
2786 break;
2787 case 2:
2788 rdev->config.cik.tile_config |= (1 << 0);
2789 break;
2790 case 4:
2791 rdev->config.cik.tile_config |= (2 << 0);
2792 break;
2793 case 8:
2794 default:
2795 /* XXX what about 12? */
2796 rdev->config.cik.tile_config |= (3 << 0);
2797 break;
2798 }
2799 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2800 rdev->config.cik.tile_config |= 1 << 4;
2801 else
2802 rdev->config.cik.tile_config |= 0 << 4;
2803 rdev->config.cik.tile_config |=
2804 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2805 rdev->config.cik.tile_config |=
2806 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2807
2808 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2809 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2810 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04002811 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2812 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04002813 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2814 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2815 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04002816
2817 cik_tiling_mode_table_init(rdev);
2818
2819 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2820 rdev->config.cik.max_sh_per_se,
2821 rdev->config.cik.max_backends_per_se);
2822
2823 /* set HW defaults for 3D engine */
2824 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2825
2826 WREG32(SX_DEBUG_1, 0x20);
2827
2828 WREG32(TA_CNTL_AUX, 0x00010000);
2829
2830 tmp = RREG32(SPI_CONFIG_CNTL);
2831 tmp |= 0x03000000;
2832 WREG32(SPI_CONFIG_CNTL, tmp);
2833
2834 WREG32(SQ_CONFIG, 1);
2835
2836 WREG32(DB_DEBUG, 0);
2837
2838 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2839 tmp |= 0x00000400;
2840 WREG32(DB_DEBUG2, tmp);
2841
2842 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2843 tmp |= 0x00020200;
2844 WREG32(DB_DEBUG3, tmp);
2845
2846 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2847 tmp |= 0x00018208;
2848 WREG32(CB_HW_CONTROL, tmp);
2849
2850 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2851
2852 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2853 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2854 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2855 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2856
2857 WREG32(VGT_NUM_INSTANCES, 1);
2858
2859 WREG32(CP_PERFMON_CNTL, 0);
2860
2861 WREG32(SQ_CONFIG, 0);
2862
2863 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2864 FORCE_EOV_MAX_REZ_CNT(255)));
2865
2866 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2867 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2868
2869 WREG32(VGT_GS_VERTEX_REUSE, 16);
2870 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2871
2872 tmp = RREG32(HDP_MISC_CNTL);
2873 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2874 WREG32(HDP_MISC_CNTL, tmp);
2875
2876 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2877 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2878
2879 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2880 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2881
2882 udelay(50);
2883}
2884
Alex Deucher841cf442012-12-18 21:47:44 -05002885/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002886 * GPU scratch registers helpers function.
2887 */
2888/**
2889 * cik_scratch_init - setup driver info for CP scratch regs
2890 *
2891 * @rdev: radeon_device pointer
2892 *
2893 * Set up the number and offset of the CP scratch registers.
2894 * NOTE: use of CP scratch registers is a legacy inferface and
2895 * is not used by default on newer asics (r6xx+). On newer asics,
2896 * memory buffers are used for fences rather than scratch regs.
2897 */
2898static void cik_scratch_init(struct radeon_device *rdev)
2899{
2900 int i;
2901
2902 rdev->scratch.num_reg = 7;
2903 rdev->scratch.reg_base = SCRATCH_REG0;
2904 for (i = 0; i < rdev->scratch.num_reg; i++) {
2905 rdev->scratch.free[i] = true;
2906 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2907 }
2908}
2909
2910/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002911 * cik_ring_test - basic gfx ring test
2912 *
2913 * @rdev: radeon_device pointer
2914 * @ring: radeon_ring structure holding ring information
2915 *
2916 * Allocate a scratch register and write to it using the gfx ring (CIK).
2917 * Provides a basic gfx ring test to verify that the ring is working.
2918 * Used by cik_cp_gfx_resume();
2919 * Returns 0 on success, error on failure.
2920 */
2921int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2922{
2923 uint32_t scratch;
2924 uint32_t tmp = 0;
2925 unsigned i;
2926 int r;
2927
2928 r = radeon_scratch_get(rdev, &scratch);
2929 if (r) {
2930 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2931 return r;
2932 }
2933 WREG32(scratch, 0xCAFEDEAD);
2934 r = radeon_ring_lock(rdev, ring, 3);
2935 if (r) {
2936 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2937 radeon_scratch_free(rdev, scratch);
2938 return r;
2939 }
2940 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2941 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2942 radeon_ring_write(ring, 0xDEADBEEF);
2943 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002944
Alex Deucherfbc832c2012-07-20 14:41:35 -04002945 for (i = 0; i < rdev->usec_timeout; i++) {
2946 tmp = RREG32(scratch);
2947 if (tmp == 0xDEADBEEF)
2948 break;
2949 DRM_UDELAY(1);
2950 }
2951 if (i < rdev->usec_timeout) {
2952 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2953 } else {
2954 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2955 ring->idx, scratch, tmp);
2956 r = -EINVAL;
2957 }
2958 radeon_scratch_free(rdev, scratch);
2959 return r;
2960}
2961
2962/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002963 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002964 *
2965 * @rdev: radeon_device pointer
2966 * @fence: radeon fence object
2967 *
2968 * Emits a fence sequnce number on the gfx ring and flushes
2969 * GPU caches.
2970 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002971void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2972 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002973{
2974 struct radeon_ring *ring = &rdev->ring[fence->ring];
2975 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2976
2977 /* EVENT_WRITE_EOP - flush caches, send int */
2978 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2979 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2980 EOP_TC_ACTION_EN |
2981 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2982 EVENT_INDEX(5)));
2983 radeon_ring_write(ring, addr & 0xfffffffc);
2984 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2985 radeon_ring_write(ring, fence->seq);
2986 radeon_ring_write(ring, 0);
2987 /* HDP flush */
2988 /* We should be using the new WAIT_REG_MEM special op packet here
2989 * but it causes the CP to hang
2990 */
2991 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2992 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2993 WRITE_DATA_DST_SEL(0)));
2994 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2995 radeon_ring_write(ring, 0);
2996 radeon_ring_write(ring, 0);
2997}
2998
Alex Deucherb07fdd32013-04-11 09:36:17 -04002999/**
3000 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3001 *
3002 * @rdev: radeon_device pointer
3003 * @fence: radeon fence object
3004 *
3005 * Emits a fence sequnce number on the compute ring and flushes
3006 * GPU caches.
3007 */
3008void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3009 struct radeon_fence *fence)
3010{
3011 struct radeon_ring *ring = &rdev->ring[fence->ring];
3012 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3013
3014 /* RELEASE_MEM - flush caches, send int */
3015 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3016 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3017 EOP_TC_ACTION_EN |
3018 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3019 EVENT_INDEX(5)));
3020 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3021 radeon_ring_write(ring, addr & 0xfffffffc);
3022 radeon_ring_write(ring, upper_32_bits(addr));
3023 radeon_ring_write(ring, fence->seq);
3024 radeon_ring_write(ring, 0);
3025 /* HDP flush */
3026 /* We should be using the new WAIT_REG_MEM special op packet here
3027 * but it causes the CP to hang
3028 */
3029 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3030 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3031 WRITE_DATA_DST_SEL(0)));
3032 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3033 radeon_ring_write(ring, 0);
3034 radeon_ring_write(ring, 0);
3035}
3036
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003037void cik_semaphore_ring_emit(struct radeon_device *rdev,
3038 struct radeon_ring *ring,
3039 struct radeon_semaphore *semaphore,
3040 bool emit_wait)
3041{
3042 uint64_t addr = semaphore->gpu_addr;
3043 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3044
3045 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3046 radeon_ring_write(ring, addr & 0xffffffff);
3047 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3048}
3049
3050/*
3051 * IB stuff
3052 */
3053/**
3054 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3055 *
3056 * @rdev: radeon_device pointer
3057 * @ib: radeon indirect buffer object
3058 *
3059 * Emits an DE (drawing engine) or CE (constant engine) IB
3060 * on the gfx ring. IBs are usually generated by userspace
3061 * acceleration drivers and submitted to the kernel for
3062 * sheduling on the ring. This function schedules the IB
3063 * on the gfx ring for execution by the GPU.
3064 */
3065void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3066{
3067 struct radeon_ring *ring = &rdev->ring[ib->ring];
3068 u32 header, control = INDIRECT_BUFFER_VALID;
3069
3070 if (ib->is_const_ib) {
3071 /* set switch buffer packet before const IB */
3072 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3073 radeon_ring_write(ring, 0);
3074
3075 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3076 } else {
3077 u32 next_rptr;
3078 if (ring->rptr_save_reg) {
3079 next_rptr = ring->wptr + 3 + 4;
3080 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3081 radeon_ring_write(ring, ((ring->rptr_save_reg -
3082 PACKET3_SET_UCONFIG_REG_START) >> 2));
3083 radeon_ring_write(ring, next_rptr);
3084 } else if (rdev->wb.enabled) {
3085 next_rptr = ring->wptr + 5 + 4;
3086 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3087 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3088 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3089 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3090 radeon_ring_write(ring, next_rptr);
3091 }
3092
3093 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3094 }
3095
3096 control |= ib->length_dw |
3097 (ib->vm ? (ib->vm->id << 24) : 0);
3098
3099 radeon_ring_write(ring, header);
3100 radeon_ring_write(ring,
3101#ifdef __BIG_ENDIAN
3102 (2 << 0) |
3103#endif
3104 (ib->gpu_addr & 0xFFFFFFFC));
3105 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3106 radeon_ring_write(ring, control);
3107}
3108
Alex Deucherfbc832c2012-07-20 14:41:35 -04003109/**
3110 * cik_ib_test - basic gfx ring IB test
3111 *
3112 * @rdev: radeon_device pointer
3113 * @ring: radeon_ring structure holding ring information
3114 *
3115 * Allocate an IB and execute it on the gfx ring (CIK).
3116 * Provides a basic gfx ring test to verify that IBs are working.
3117 * Returns 0 on success, error on failure.
3118 */
3119int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3120{
3121 struct radeon_ib ib;
3122 uint32_t scratch;
3123 uint32_t tmp = 0;
3124 unsigned i;
3125 int r;
3126
3127 r = radeon_scratch_get(rdev, &scratch);
3128 if (r) {
3129 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3130 return r;
3131 }
3132 WREG32(scratch, 0xCAFEDEAD);
3133 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3134 if (r) {
3135 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3136 return r;
3137 }
3138 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3139 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3140 ib.ptr[2] = 0xDEADBEEF;
3141 ib.length_dw = 3;
3142 r = radeon_ib_schedule(rdev, &ib, NULL);
3143 if (r) {
3144 radeon_scratch_free(rdev, scratch);
3145 radeon_ib_free(rdev, &ib);
3146 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3147 return r;
3148 }
3149 r = radeon_fence_wait(ib.fence, false);
3150 if (r) {
3151 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3152 return r;
3153 }
3154 for (i = 0; i < rdev->usec_timeout; i++) {
3155 tmp = RREG32(scratch);
3156 if (tmp == 0xDEADBEEF)
3157 break;
3158 DRM_UDELAY(1);
3159 }
3160 if (i < rdev->usec_timeout) {
3161 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3162 } else {
3163 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3164 scratch, tmp);
3165 r = -EINVAL;
3166 }
3167 radeon_scratch_free(rdev, scratch);
3168 radeon_ib_free(rdev, &ib);
3169 return r;
3170}
3171
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003172/*
Alex Deucher841cf442012-12-18 21:47:44 -05003173 * CP.
3174 * On CIK, gfx and compute now have independant command processors.
3175 *
3176 * GFX
3177 * Gfx consists of a single ring and can process both gfx jobs and
3178 * compute jobs. The gfx CP consists of three microengines (ME):
3179 * PFP - Pre-Fetch Parser
3180 * ME - Micro Engine
3181 * CE - Constant Engine
3182 * The PFP and ME make up what is considered the Drawing Engine (DE).
3183 * The CE is an asynchronous engine used for updating buffer desciptors
3184 * used by the DE so that they can be loaded into cache in parallel
3185 * while the DE is processing state update packets.
3186 *
3187 * Compute
3188 * The compute CP consists of two microengines (ME):
3189 * MEC1 - Compute MicroEngine 1
3190 * MEC2 - Compute MicroEngine 2
3191 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3192 * The queues are exposed to userspace and are programmed directly
3193 * by the compute runtime.
3194 */
3195/**
3196 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3197 *
3198 * @rdev: radeon_device pointer
3199 * @enable: enable or disable the MEs
3200 *
3201 * Halts or unhalts the gfx MEs.
3202 */
3203static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3204{
3205 if (enable)
3206 WREG32(CP_ME_CNTL, 0);
3207 else {
3208 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3209 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3210 }
3211 udelay(50);
3212}
3213
3214/**
3215 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3216 *
3217 * @rdev: radeon_device pointer
3218 *
3219 * Loads the gfx PFP, ME, and CE ucode.
3220 * Returns 0 for success, -EINVAL if the ucode is not available.
3221 */
3222static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3223{
3224 const __be32 *fw_data;
3225 int i;
3226
3227 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3228 return -EINVAL;
3229
3230 cik_cp_gfx_enable(rdev, false);
3231
3232 /* PFP */
3233 fw_data = (const __be32 *)rdev->pfp_fw->data;
3234 WREG32(CP_PFP_UCODE_ADDR, 0);
3235 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3236 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3237 WREG32(CP_PFP_UCODE_ADDR, 0);
3238
3239 /* CE */
3240 fw_data = (const __be32 *)rdev->ce_fw->data;
3241 WREG32(CP_CE_UCODE_ADDR, 0);
3242 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3243 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3244 WREG32(CP_CE_UCODE_ADDR, 0);
3245
3246 /* ME */
3247 fw_data = (const __be32 *)rdev->me_fw->data;
3248 WREG32(CP_ME_RAM_WADDR, 0);
3249 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3250 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3251 WREG32(CP_ME_RAM_WADDR, 0);
3252
3253 WREG32(CP_PFP_UCODE_ADDR, 0);
3254 WREG32(CP_CE_UCODE_ADDR, 0);
3255 WREG32(CP_ME_RAM_WADDR, 0);
3256 WREG32(CP_ME_RAM_RADDR, 0);
3257 return 0;
3258}
3259
3260/**
3261 * cik_cp_gfx_start - start the gfx ring
3262 *
3263 * @rdev: radeon_device pointer
3264 *
3265 * Enables the ring and loads the clear state context and other
3266 * packets required to init the ring.
3267 * Returns 0 for success, error for failure.
3268 */
3269static int cik_cp_gfx_start(struct radeon_device *rdev)
3270{
3271 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3272 int r, i;
3273
3274 /* init the CP */
3275 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3276 WREG32(CP_ENDIAN_SWAP, 0);
3277 WREG32(CP_DEVICE_ID, 1);
3278
3279 cik_cp_gfx_enable(rdev, true);
3280
3281 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3282 if (r) {
3283 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3284 return r;
3285 }
3286
3287 /* init the CE partitions. CE only used for gfx on CIK */
3288 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3289 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3290 radeon_ring_write(ring, 0xc000);
3291 radeon_ring_write(ring, 0xc000);
3292
3293 /* setup clear context state */
3294 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3295 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3296
3297 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3298 radeon_ring_write(ring, 0x80000000);
3299 radeon_ring_write(ring, 0x80000000);
3300
3301 for (i = 0; i < cik_default_size; i++)
3302 radeon_ring_write(ring, cik_default_state[i]);
3303
3304 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3305 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3306
3307 /* set clear context state */
3308 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3309 radeon_ring_write(ring, 0);
3310
3311 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3312 radeon_ring_write(ring, 0x00000316);
3313 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3314 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3315
3316 radeon_ring_unlock_commit(rdev, ring);
3317
3318 return 0;
3319}
3320
3321/**
3322 * cik_cp_gfx_fini - stop the gfx ring
3323 *
3324 * @rdev: radeon_device pointer
3325 *
3326 * Stop the gfx ring and tear down the driver ring
3327 * info.
3328 */
3329static void cik_cp_gfx_fini(struct radeon_device *rdev)
3330{
3331 cik_cp_gfx_enable(rdev, false);
3332 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3333}
3334
3335/**
3336 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3337 *
3338 * @rdev: radeon_device pointer
3339 *
3340 * Program the location and size of the gfx ring buffer
3341 * and test it to make sure it's working.
3342 * Returns 0 for success, error for failure.
3343 */
3344static int cik_cp_gfx_resume(struct radeon_device *rdev)
3345{
3346 struct radeon_ring *ring;
3347 u32 tmp;
3348 u32 rb_bufsz;
3349 u64 rb_addr;
3350 int r;
3351
3352 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3353 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3354
3355 /* Set the write pointer delay */
3356 WREG32(CP_RB_WPTR_DELAY, 0);
3357
3358 /* set the RB to use vmid 0 */
3359 WREG32(CP_RB_VMID, 0);
3360
3361 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3362
3363 /* ring 0 - compute and gfx */
3364 /* Set ring buffer size */
3365 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3366 rb_bufsz = drm_order(ring->ring_size / 8);
3367 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3368#ifdef __BIG_ENDIAN
3369 tmp |= BUF_SWAP_32BIT;
3370#endif
3371 WREG32(CP_RB0_CNTL, tmp);
3372
3373 /* Initialize the ring buffer's read and write pointers */
3374 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3375 ring->wptr = 0;
3376 WREG32(CP_RB0_WPTR, ring->wptr);
3377
3378 /* set the wb address wether it's enabled or not */
3379 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3380 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3381
3382 /* scratch register shadowing is no longer supported */
3383 WREG32(SCRATCH_UMSK, 0);
3384
3385 if (!rdev->wb.enabled)
3386 tmp |= RB_NO_UPDATE;
3387
3388 mdelay(1);
3389 WREG32(CP_RB0_CNTL, tmp);
3390
3391 rb_addr = ring->gpu_addr >> 8;
3392 WREG32(CP_RB0_BASE, rb_addr);
3393 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3394
3395 ring->rptr = RREG32(CP_RB0_RPTR);
3396
3397 /* start the ring */
3398 cik_cp_gfx_start(rdev);
3399 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3400 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3401 if (r) {
3402 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3403 return r;
3404 }
3405 return 0;
3406}
3407
Alex Deucher963e81f2013-06-26 17:37:11 -04003408u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3409 struct radeon_ring *ring)
3410{
3411 u32 rptr;
3412
3413
3414
3415 if (rdev->wb.enabled) {
3416 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3417 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003418 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003419 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3420 rptr = RREG32(CP_HQD_PQ_RPTR);
3421 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003422 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003423 }
Alex Deucher963e81f2013-06-26 17:37:11 -04003424
3425 return rptr;
3426}
3427
3428u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3429 struct radeon_ring *ring)
3430{
3431 u32 wptr;
3432
3433 if (rdev->wb.enabled) {
3434 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3435 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003436 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003437 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3438 wptr = RREG32(CP_HQD_PQ_WPTR);
3439 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003440 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003441 }
Alex Deucher963e81f2013-06-26 17:37:11 -04003442
3443 return wptr;
3444}
3445
3446void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3447 struct radeon_ring *ring)
3448{
Christian König2e1e6da2013-08-13 11:56:52 +02003449 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3450 WDOORBELL32(ring->doorbell_offset, ring->wptr);
Alex Deucher963e81f2013-06-26 17:37:11 -04003451}
3452
Alex Deucher841cf442012-12-18 21:47:44 -05003453/**
3454 * cik_cp_compute_enable - enable/disable the compute CP MEs
3455 *
3456 * @rdev: radeon_device pointer
3457 * @enable: enable or disable the MEs
3458 *
3459 * Halts or unhalts the compute MEs.
3460 */
3461static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3462{
3463 if (enable)
3464 WREG32(CP_MEC_CNTL, 0);
3465 else
3466 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3467 udelay(50);
3468}
3469
3470/**
3471 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3472 *
3473 * @rdev: radeon_device pointer
3474 *
3475 * Loads the compute MEC1&2 ucode.
3476 * Returns 0 for success, -EINVAL if the ucode is not available.
3477 */
3478static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3479{
3480 const __be32 *fw_data;
3481 int i;
3482
3483 if (!rdev->mec_fw)
3484 return -EINVAL;
3485
3486 cik_cp_compute_enable(rdev, false);
3487
3488 /* MEC1 */
3489 fw_data = (const __be32 *)rdev->mec_fw->data;
3490 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3491 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3492 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3493 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3494
3495 if (rdev->family == CHIP_KAVERI) {
3496 /* MEC2 */
3497 fw_data = (const __be32 *)rdev->mec_fw->data;
3498 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3499 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3500 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3501 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3502 }
3503
3504 return 0;
3505}
3506
3507/**
3508 * cik_cp_compute_start - start the compute queues
3509 *
3510 * @rdev: radeon_device pointer
3511 *
3512 * Enable the compute queues.
3513 * Returns 0 for success, error for failure.
3514 */
3515static int cik_cp_compute_start(struct radeon_device *rdev)
3516{
Alex Deucher963e81f2013-06-26 17:37:11 -04003517 cik_cp_compute_enable(rdev, true);
3518
Alex Deucher841cf442012-12-18 21:47:44 -05003519 return 0;
3520}
3521
3522/**
3523 * cik_cp_compute_fini - stop the compute queues
3524 *
3525 * @rdev: radeon_device pointer
3526 *
3527 * Stop the compute queues and tear down the driver queue
3528 * info.
3529 */
3530static void cik_cp_compute_fini(struct radeon_device *rdev)
3531{
Alex Deucher963e81f2013-06-26 17:37:11 -04003532 int i, idx, r;
3533
Alex Deucher841cf442012-12-18 21:47:44 -05003534 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04003535
3536 for (i = 0; i < 2; i++) {
3537 if (i == 0)
3538 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3539 else
3540 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3541
3542 if (rdev->ring[idx].mqd_obj) {
3543 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3544 if (unlikely(r != 0))
3545 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3546
3547 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3548 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3549
3550 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3551 rdev->ring[idx].mqd_obj = NULL;
3552 }
3553 }
Alex Deucher841cf442012-12-18 21:47:44 -05003554}
3555
Alex Deucher963e81f2013-06-26 17:37:11 -04003556static void cik_mec_fini(struct radeon_device *rdev)
3557{
3558 int r;
3559
3560 if (rdev->mec.hpd_eop_obj) {
3561 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3562 if (unlikely(r != 0))
3563 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3564 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3565 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3566
3567 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3568 rdev->mec.hpd_eop_obj = NULL;
3569 }
3570}
3571
3572#define MEC_HPD_SIZE 2048
3573
3574static int cik_mec_init(struct radeon_device *rdev)
3575{
3576 int r;
3577 u32 *hpd;
3578
3579 /*
3580 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3581 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3582 */
3583 if (rdev->family == CHIP_KAVERI)
3584 rdev->mec.num_mec = 2;
3585 else
3586 rdev->mec.num_mec = 1;
3587 rdev->mec.num_pipe = 4;
3588 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3589
3590 if (rdev->mec.hpd_eop_obj == NULL) {
3591 r = radeon_bo_create(rdev,
3592 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3593 PAGE_SIZE, true,
3594 RADEON_GEM_DOMAIN_GTT, NULL,
3595 &rdev->mec.hpd_eop_obj);
3596 if (r) {
3597 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3598 return r;
3599 }
3600 }
3601
3602 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3603 if (unlikely(r != 0)) {
3604 cik_mec_fini(rdev);
3605 return r;
3606 }
3607 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3608 &rdev->mec.hpd_eop_gpu_addr);
3609 if (r) {
3610 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3611 cik_mec_fini(rdev);
3612 return r;
3613 }
3614 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3615 if (r) {
3616 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3617 cik_mec_fini(rdev);
3618 return r;
3619 }
3620
3621 /* clear memory. Not sure if this is required or not */
3622 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3623
3624 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3625 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3626
3627 return 0;
3628}
3629
3630struct hqd_registers
3631{
3632 u32 cp_mqd_base_addr;
3633 u32 cp_mqd_base_addr_hi;
3634 u32 cp_hqd_active;
3635 u32 cp_hqd_vmid;
3636 u32 cp_hqd_persistent_state;
3637 u32 cp_hqd_pipe_priority;
3638 u32 cp_hqd_queue_priority;
3639 u32 cp_hqd_quantum;
3640 u32 cp_hqd_pq_base;
3641 u32 cp_hqd_pq_base_hi;
3642 u32 cp_hqd_pq_rptr;
3643 u32 cp_hqd_pq_rptr_report_addr;
3644 u32 cp_hqd_pq_rptr_report_addr_hi;
3645 u32 cp_hqd_pq_wptr_poll_addr;
3646 u32 cp_hqd_pq_wptr_poll_addr_hi;
3647 u32 cp_hqd_pq_doorbell_control;
3648 u32 cp_hqd_pq_wptr;
3649 u32 cp_hqd_pq_control;
3650 u32 cp_hqd_ib_base_addr;
3651 u32 cp_hqd_ib_base_addr_hi;
3652 u32 cp_hqd_ib_rptr;
3653 u32 cp_hqd_ib_control;
3654 u32 cp_hqd_iq_timer;
3655 u32 cp_hqd_iq_rptr;
3656 u32 cp_hqd_dequeue_request;
3657 u32 cp_hqd_dma_offload;
3658 u32 cp_hqd_sema_cmd;
3659 u32 cp_hqd_msg_type;
3660 u32 cp_hqd_atomic0_preop_lo;
3661 u32 cp_hqd_atomic0_preop_hi;
3662 u32 cp_hqd_atomic1_preop_lo;
3663 u32 cp_hqd_atomic1_preop_hi;
3664 u32 cp_hqd_hq_scheduler0;
3665 u32 cp_hqd_hq_scheduler1;
3666 u32 cp_mqd_control;
3667};
3668
3669struct bonaire_mqd
3670{
3671 u32 header;
3672 u32 dispatch_initiator;
3673 u32 dimensions[3];
3674 u32 start_idx[3];
3675 u32 num_threads[3];
3676 u32 pipeline_stat_enable;
3677 u32 perf_counter_enable;
3678 u32 pgm[2];
3679 u32 tba[2];
3680 u32 tma[2];
3681 u32 pgm_rsrc[2];
3682 u32 vmid;
3683 u32 resource_limits;
3684 u32 static_thread_mgmt01[2];
3685 u32 tmp_ring_size;
3686 u32 static_thread_mgmt23[2];
3687 u32 restart[3];
3688 u32 thread_trace_enable;
3689 u32 reserved1;
3690 u32 user_data[16];
3691 u32 vgtcs_invoke_count[2];
3692 struct hqd_registers queue_state;
3693 u32 dequeue_cntr;
3694 u32 interrupt_queue[64];
3695};
3696
Alex Deucher841cf442012-12-18 21:47:44 -05003697/**
3698 * cik_cp_compute_resume - setup the compute queue registers
3699 *
3700 * @rdev: radeon_device pointer
3701 *
3702 * Program the compute queues and test them to make sure they
3703 * are working.
3704 * Returns 0 for success, error for failure.
3705 */
3706static int cik_cp_compute_resume(struct radeon_device *rdev)
3707{
Alex Deucher963e81f2013-06-26 17:37:11 -04003708 int r, i, idx;
3709 u32 tmp;
3710 bool use_doorbell = true;
3711 u64 hqd_gpu_addr;
3712 u64 mqd_gpu_addr;
3713 u64 eop_gpu_addr;
3714 u64 wb_gpu_addr;
3715 u32 *buf;
3716 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05003717
Alex Deucher841cf442012-12-18 21:47:44 -05003718 r = cik_cp_compute_start(rdev);
3719 if (r)
3720 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04003721
3722 /* fix up chicken bits */
3723 tmp = RREG32(CP_CPF_DEBUG);
3724 tmp |= (1 << 23);
3725 WREG32(CP_CPF_DEBUG, tmp);
3726
3727 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04003728 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003729 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3730 int me = (i < 4) ? 1 : 2;
3731 int pipe = (i < 4) ? i : (i - 4);
3732
3733 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3734
3735 cik_srbm_select(rdev, me, pipe, 0, 0);
3736
3737 /* write the EOP addr */
3738 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3739 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3740
3741 /* set the VMID assigned */
3742 WREG32(CP_HPD_EOP_VMID, 0);
3743
3744 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3745 tmp = RREG32(CP_HPD_EOP_CONTROL);
3746 tmp &= ~EOP_SIZE_MASK;
3747 tmp |= drm_order(MEC_HPD_SIZE / 8);
3748 WREG32(CP_HPD_EOP_CONTROL, tmp);
3749 }
3750 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003751 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003752
3753 /* init the queues. Just two for now. */
3754 for (i = 0; i < 2; i++) {
3755 if (i == 0)
3756 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3757 else
3758 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3759
3760 if (rdev->ring[idx].mqd_obj == NULL) {
3761 r = radeon_bo_create(rdev,
3762 sizeof(struct bonaire_mqd),
3763 PAGE_SIZE, true,
3764 RADEON_GEM_DOMAIN_GTT, NULL,
3765 &rdev->ring[idx].mqd_obj);
3766 if (r) {
3767 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3768 return r;
3769 }
3770 }
3771
3772 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3773 if (unlikely(r != 0)) {
3774 cik_cp_compute_fini(rdev);
3775 return r;
3776 }
3777 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3778 &mqd_gpu_addr);
3779 if (r) {
3780 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3781 cik_cp_compute_fini(rdev);
3782 return r;
3783 }
3784 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3785 if (r) {
3786 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3787 cik_cp_compute_fini(rdev);
3788 return r;
3789 }
3790
3791 /* doorbell offset */
3792 rdev->ring[idx].doorbell_offset =
3793 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3794
3795 /* init the mqd struct */
3796 memset(buf, 0, sizeof(struct bonaire_mqd));
3797
3798 mqd = (struct bonaire_mqd *)buf;
3799 mqd->header = 0xC0310800;
3800 mqd->static_thread_mgmt01[0] = 0xffffffff;
3801 mqd->static_thread_mgmt01[1] = 0xffffffff;
3802 mqd->static_thread_mgmt23[0] = 0xffffffff;
3803 mqd->static_thread_mgmt23[1] = 0xffffffff;
3804
Alex Deucherf61d5b462013-08-06 12:40:16 -04003805 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003806 cik_srbm_select(rdev, rdev->ring[idx].me,
3807 rdev->ring[idx].pipe,
3808 rdev->ring[idx].queue, 0);
3809
3810 /* disable wptr polling */
3811 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3812 tmp &= ~WPTR_POLL_EN;
3813 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3814
3815 /* enable doorbell? */
3816 mqd->queue_state.cp_hqd_pq_doorbell_control =
3817 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3818 if (use_doorbell)
3819 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3820 else
3821 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3822 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3823 mqd->queue_state.cp_hqd_pq_doorbell_control);
3824
3825 /* disable the queue if it's active */
3826 mqd->queue_state.cp_hqd_dequeue_request = 0;
3827 mqd->queue_state.cp_hqd_pq_rptr = 0;
3828 mqd->queue_state.cp_hqd_pq_wptr= 0;
3829 if (RREG32(CP_HQD_ACTIVE) & 1) {
3830 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3831 for (i = 0; i < rdev->usec_timeout; i++) {
3832 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3833 break;
3834 udelay(1);
3835 }
3836 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3837 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3838 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3839 }
3840
3841 /* set the pointer to the MQD */
3842 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3843 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3844 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3845 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3846 /* set MQD vmid to 0 */
3847 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3848 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3849 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3850
3851 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3852 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3853 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3854 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3855 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3856 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3857
3858 /* set up the HQD, this is similar to CP_RB0_CNTL */
3859 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3860 mqd->queue_state.cp_hqd_pq_control &=
3861 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3862
3863 mqd->queue_state.cp_hqd_pq_control |=
3864 drm_order(rdev->ring[idx].ring_size / 8);
3865 mqd->queue_state.cp_hqd_pq_control |=
3866 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3867#ifdef __BIG_ENDIAN
3868 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3869#endif
3870 mqd->queue_state.cp_hqd_pq_control &=
3871 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3872 mqd->queue_state.cp_hqd_pq_control |=
3873 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3874 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3875
3876 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3877 if (i == 0)
3878 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3879 else
3880 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3881 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3882 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3883 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3884 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3885 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3886
3887 /* set the wb address wether it's enabled or not */
3888 if (i == 0)
3889 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3890 else
3891 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3892 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3893 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3894 upper_32_bits(wb_gpu_addr) & 0xffff;
3895 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3896 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3897 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3898 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3899
3900 /* enable the doorbell if requested */
3901 if (use_doorbell) {
3902 mqd->queue_state.cp_hqd_pq_doorbell_control =
3903 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3904 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3905 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3906 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3907 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3908 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3909 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3910
3911 } else {
3912 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3913 }
3914 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3915 mqd->queue_state.cp_hqd_pq_doorbell_control);
3916
3917 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3918 rdev->ring[idx].wptr = 0;
3919 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3920 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3921 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3922 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3923
3924 /* set the vmid for the queue */
3925 mqd->queue_state.cp_hqd_vmid = 0;
3926 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3927
3928 /* activate the queue */
3929 mqd->queue_state.cp_hqd_active = 1;
3930 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3931
3932 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003933 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003934
3935 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3936 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3937
3938 rdev->ring[idx].ready = true;
3939 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3940 if (r)
3941 rdev->ring[idx].ready = false;
3942 }
3943
Alex Deucher841cf442012-12-18 21:47:44 -05003944 return 0;
3945}
3946
Alex Deucher841cf442012-12-18 21:47:44 -05003947static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3948{
3949 cik_cp_gfx_enable(rdev, enable);
3950 cik_cp_compute_enable(rdev, enable);
3951}
3952
Alex Deucher841cf442012-12-18 21:47:44 -05003953static int cik_cp_load_microcode(struct radeon_device *rdev)
3954{
3955 int r;
3956
3957 r = cik_cp_gfx_load_microcode(rdev);
3958 if (r)
3959 return r;
3960 r = cik_cp_compute_load_microcode(rdev);
3961 if (r)
3962 return r;
3963
3964 return 0;
3965}
3966
Alex Deucher841cf442012-12-18 21:47:44 -05003967static void cik_cp_fini(struct radeon_device *rdev)
3968{
3969 cik_cp_gfx_fini(rdev);
3970 cik_cp_compute_fini(rdev);
3971}
3972
Alex Deucher841cf442012-12-18 21:47:44 -05003973static int cik_cp_resume(struct radeon_device *rdev)
3974{
3975 int r;
3976
3977 /* Reset all cp blocks */
3978 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3979 RREG32(GRBM_SOFT_RESET);
3980 mdelay(15);
3981 WREG32(GRBM_SOFT_RESET, 0);
3982 RREG32(GRBM_SOFT_RESET);
3983
3984 r = cik_cp_load_microcode(rdev);
3985 if (r)
3986 return r;
3987
3988 r = cik_cp_gfx_resume(rdev);
3989 if (r)
3990 return r;
3991 r = cik_cp_compute_resume(rdev);
3992 if (r)
3993 return r;
3994
3995 return 0;
3996}
3997
Alex Deuchercc066712013-04-09 12:59:51 -04003998static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3999{
4000 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4001 RREG32(GRBM_STATUS));
4002 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4003 RREG32(GRBM_STATUS2));
4004 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4005 RREG32(GRBM_STATUS_SE0));
4006 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4007 RREG32(GRBM_STATUS_SE1));
4008 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4009 RREG32(GRBM_STATUS_SE2));
4010 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4011 RREG32(GRBM_STATUS_SE3));
4012 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4013 RREG32(SRBM_STATUS));
4014 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4015 RREG32(SRBM_STATUS2));
4016 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4017 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4018 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4019 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04004020 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4021 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4022 RREG32(CP_STALLED_STAT1));
4023 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4024 RREG32(CP_STALLED_STAT2));
4025 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4026 RREG32(CP_STALLED_STAT3));
4027 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4028 RREG32(CP_CPF_BUSY_STAT));
4029 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4030 RREG32(CP_CPF_STALLED_STAT1));
4031 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4032 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4033 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4034 RREG32(CP_CPC_STALLED_STAT1));
4035 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04004036}
4037
Alex Deucher6f2043c2013-04-09 12:43:41 -04004038/**
Alex Deuchercc066712013-04-09 12:59:51 -04004039 * cik_gpu_check_soft_reset - check which blocks are busy
4040 *
4041 * @rdev: radeon_device pointer
4042 *
4043 * Check which blocks are busy and return the relevant reset
4044 * mask to be used by cik_gpu_soft_reset().
4045 * Returns a mask of the blocks to be reset.
4046 */
Christian König2483b4e2013-08-13 11:56:54 +02004047u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
Alex Deuchercc066712013-04-09 12:59:51 -04004048{
4049 u32 reset_mask = 0;
4050 u32 tmp;
4051
4052 /* GRBM_STATUS */
4053 tmp = RREG32(GRBM_STATUS);
4054 if (tmp & (PA_BUSY | SC_BUSY |
4055 BCI_BUSY | SX_BUSY |
4056 TA_BUSY | VGT_BUSY |
4057 DB_BUSY | CB_BUSY |
4058 GDS_BUSY | SPI_BUSY |
4059 IA_BUSY | IA_BUSY_NO_DMA))
4060 reset_mask |= RADEON_RESET_GFX;
4061
4062 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4063 reset_mask |= RADEON_RESET_CP;
4064
4065 /* GRBM_STATUS2 */
4066 tmp = RREG32(GRBM_STATUS2);
4067 if (tmp & RLC_BUSY)
4068 reset_mask |= RADEON_RESET_RLC;
4069
4070 /* SDMA0_STATUS_REG */
4071 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4072 if (!(tmp & SDMA_IDLE))
4073 reset_mask |= RADEON_RESET_DMA;
4074
4075 /* SDMA1_STATUS_REG */
4076 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4077 if (!(tmp & SDMA_IDLE))
4078 reset_mask |= RADEON_RESET_DMA1;
4079
4080 /* SRBM_STATUS2 */
4081 tmp = RREG32(SRBM_STATUS2);
4082 if (tmp & SDMA_BUSY)
4083 reset_mask |= RADEON_RESET_DMA;
4084
4085 if (tmp & SDMA1_BUSY)
4086 reset_mask |= RADEON_RESET_DMA1;
4087
4088 /* SRBM_STATUS */
4089 tmp = RREG32(SRBM_STATUS);
4090
4091 if (tmp & IH_BUSY)
4092 reset_mask |= RADEON_RESET_IH;
4093
4094 if (tmp & SEM_BUSY)
4095 reset_mask |= RADEON_RESET_SEM;
4096
4097 if (tmp & GRBM_RQ_PENDING)
4098 reset_mask |= RADEON_RESET_GRBM;
4099
4100 if (tmp & VMC_BUSY)
4101 reset_mask |= RADEON_RESET_VMC;
4102
4103 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4104 MCC_BUSY | MCD_BUSY))
4105 reset_mask |= RADEON_RESET_MC;
4106
4107 if (evergreen_is_display_hung(rdev))
4108 reset_mask |= RADEON_RESET_DISPLAY;
4109
4110 /* Skip MC reset as it's mostly likely not hung, just busy */
4111 if (reset_mask & RADEON_RESET_MC) {
4112 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4113 reset_mask &= ~RADEON_RESET_MC;
4114 }
4115
4116 return reset_mask;
4117}
4118
4119/**
4120 * cik_gpu_soft_reset - soft reset GPU
4121 *
4122 * @rdev: radeon_device pointer
4123 * @reset_mask: mask of which blocks to reset
4124 *
4125 * Soft reset the blocks specified in @reset_mask.
4126 */
4127static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4128{
4129 struct evergreen_mc_save save;
4130 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4131 u32 tmp;
4132
4133 if (reset_mask == 0)
4134 return;
4135
4136 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4137
4138 cik_print_gpu_status_regs(rdev);
4139 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4140 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4141 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4142 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4143
4144 /* stop the rlc */
4145 cik_rlc_stop(rdev);
4146
4147 /* Disable GFX parsing/prefetching */
4148 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4149
4150 /* Disable MEC parsing/prefetching */
4151 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4152
4153 if (reset_mask & RADEON_RESET_DMA) {
4154 /* sdma0 */
4155 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4156 tmp |= SDMA_HALT;
4157 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4158 }
4159 if (reset_mask & RADEON_RESET_DMA1) {
4160 /* sdma1 */
4161 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4162 tmp |= SDMA_HALT;
4163 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4164 }
4165
4166 evergreen_mc_stop(rdev, &save);
4167 if (evergreen_mc_wait_for_idle(rdev)) {
4168 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4169 }
4170
4171 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4172 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4173
4174 if (reset_mask & RADEON_RESET_CP) {
4175 grbm_soft_reset |= SOFT_RESET_CP;
4176
4177 srbm_soft_reset |= SOFT_RESET_GRBM;
4178 }
4179
4180 if (reset_mask & RADEON_RESET_DMA)
4181 srbm_soft_reset |= SOFT_RESET_SDMA;
4182
4183 if (reset_mask & RADEON_RESET_DMA1)
4184 srbm_soft_reset |= SOFT_RESET_SDMA1;
4185
4186 if (reset_mask & RADEON_RESET_DISPLAY)
4187 srbm_soft_reset |= SOFT_RESET_DC;
4188
4189 if (reset_mask & RADEON_RESET_RLC)
4190 grbm_soft_reset |= SOFT_RESET_RLC;
4191
4192 if (reset_mask & RADEON_RESET_SEM)
4193 srbm_soft_reset |= SOFT_RESET_SEM;
4194
4195 if (reset_mask & RADEON_RESET_IH)
4196 srbm_soft_reset |= SOFT_RESET_IH;
4197
4198 if (reset_mask & RADEON_RESET_GRBM)
4199 srbm_soft_reset |= SOFT_RESET_GRBM;
4200
4201 if (reset_mask & RADEON_RESET_VMC)
4202 srbm_soft_reset |= SOFT_RESET_VMC;
4203
4204 if (!(rdev->flags & RADEON_IS_IGP)) {
4205 if (reset_mask & RADEON_RESET_MC)
4206 srbm_soft_reset |= SOFT_RESET_MC;
4207 }
4208
4209 if (grbm_soft_reset) {
4210 tmp = RREG32(GRBM_SOFT_RESET);
4211 tmp |= grbm_soft_reset;
4212 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4213 WREG32(GRBM_SOFT_RESET, tmp);
4214 tmp = RREG32(GRBM_SOFT_RESET);
4215
4216 udelay(50);
4217
4218 tmp &= ~grbm_soft_reset;
4219 WREG32(GRBM_SOFT_RESET, tmp);
4220 tmp = RREG32(GRBM_SOFT_RESET);
4221 }
4222
4223 if (srbm_soft_reset) {
4224 tmp = RREG32(SRBM_SOFT_RESET);
4225 tmp |= srbm_soft_reset;
4226 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4227 WREG32(SRBM_SOFT_RESET, tmp);
4228 tmp = RREG32(SRBM_SOFT_RESET);
4229
4230 udelay(50);
4231
4232 tmp &= ~srbm_soft_reset;
4233 WREG32(SRBM_SOFT_RESET, tmp);
4234 tmp = RREG32(SRBM_SOFT_RESET);
4235 }
4236
4237 /* Wait a little for things to settle down */
4238 udelay(50);
4239
4240 evergreen_mc_resume(rdev, &save);
4241 udelay(50);
4242
4243 cik_print_gpu_status_regs(rdev);
4244}
4245
4246/**
4247 * cik_asic_reset - soft reset GPU
4248 *
4249 * @rdev: radeon_device pointer
4250 *
4251 * Look up which blocks are hung and attempt
4252 * to reset them.
4253 * Returns 0 for success.
4254 */
4255int cik_asic_reset(struct radeon_device *rdev)
4256{
4257 u32 reset_mask;
4258
4259 reset_mask = cik_gpu_check_soft_reset(rdev);
4260
4261 if (reset_mask)
4262 r600_set_bios_scratch_engine_hung(rdev, true);
4263
4264 cik_gpu_soft_reset(rdev, reset_mask);
4265
4266 reset_mask = cik_gpu_check_soft_reset(rdev);
4267
4268 if (!reset_mask)
4269 r600_set_bios_scratch_engine_hung(rdev, false);
4270
4271 return 0;
4272}
4273
4274/**
4275 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004276 *
4277 * @rdev: radeon_device pointer
4278 * @ring: radeon_ring structure holding ring information
4279 *
4280 * Check if the 3D engine is locked up (CIK).
4281 * Returns true if the engine is locked, false if not.
4282 */
Alex Deuchercc066712013-04-09 12:59:51 -04004283bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004284{
Alex Deuchercc066712013-04-09 12:59:51 -04004285 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004286
Alex Deuchercc066712013-04-09 12:59:51 -04004287 if (!(reset_mask & (RADEON_RESET_GFX |
4288 RADEON_RESET_COMPUTE |
4289 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004290 radeon_ring_lockup_update(ring);
4291 return false;
4292 }
4293 /* force CP activities */
4294 radeon_ring_force_activity(rdev, ring);
4295 return radeon_ring_test_lockup(rdev, ring);
4296}
4297
Alex Deucher1c491652013-04-09 12:45:26 -04004298/* MC */
4299/**
4300 * cik_mc_program - program the GPU memory controller
4301 *
4302 * @rdev: radeon_device pointer
4303 *
4304 * Set the location of vram, gart, and AGP in the GPU's
4305 * physical address space (CIK).
4306 */
4307static void cik_mc_program(struct radeon_device *rdev)
4308{
4309 struct evergreen_mc_save save;
4310 u32 tmp;
4311 int i, j;
4312
4313 /* Initialize HDP */
4314 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4315 WREG32((0x2c14 + j), 0x00000000);
4316 WREG32((0x2c18 + j), 0x00000000);
4317 WREG32((0x2c1c + j), 0x00000000);
4318 WREG32((0x2c20 + j), 0x00000000);
4319 WREG32((0x2c24 + j), 0x00000000);
4320 }
4321 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4322
4323 evergreen_mc_stop(rdev, &save);
4324 if (radeon_mc_wait_for_idle(rdev)) {
4325 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4326 }
4327 /* Lockout access through VGA aperture*/
4328 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4329 /* Update configuration */
4330 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4331 rdev->mc.vram_start >> 12);
4332 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4333 rdev->mc.vram_end >> 12);
4334 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4335 rdev->vram_scratch.gpu_addr >> 12);
4336 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4337 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4338 WREG32(MC_VM_FB_LOCATION, tmp);
4339 /* XXX double check these! */
4340 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4341 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4342 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4343 WREG32(MC_VM_AGP_BASE, 0);
4344 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4345 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4346 if (radeon_mc_wait_for_idle(rdev)) {
4347 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4348 }
4349 evergreen_mc_resume(rdev, &save);
4350 /* we need to own VRAM, so turn off the VGA renderer here
4351 * to stop it overwriting our objects */
4352 rv515_vga_render_disable(rdev);
4353}
4354
4355/**
4356 * cik_mc_init - initialize the memory controller driver params
4357 *
4358 * @rdev: radeon_device pointer
4359 *
4360 * Look up the amount of vram, vram width, and decide how to place
4361 * vram and gart within the GPU's physical address space (CIK).
4362 * Returns 0 for success.
4363 */
4364static int cik_mc_init(struct radeon_device *rdev)
4365{
4366 u32 tmp;
4367 int chansize, numchan;
4368
4369 /* Get VRAM informations */
4370 rdev->mc.vram_is_ddr = true;
4371 tmp = RREG32(MC_ARB_RAMCFG);
4372 if (tmp & CHANSIZE_MASK) {
4373 chansize = 64;
4374 } else {
4375 chansize = 32;
4376 }
4377 tmp = RREG32(MC_SHARED_CHMAP);
4378 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4379 case 0:
4380 default:
4381 numchan = 1;
4382 break;
4383 case 1:
4384 numchan = 2;
4385 break;
4386 case 2:
4387 numchan = 4;
4388 break;
4389 case 3:
4390 numchan = 8;
4391 break;
4392 case 4:
4393 numchan = 3;
4394 break;
4395 case 5:
4396 numchan = 6;
4397 break;
4398 case 6:
4399 numchan = 10;
4400 break;
4401 case 7:
4402 numchan = 12;
4403 break;
4404 case 8:
4405 numchan = 16;
4406 break;
4407 }
4408 rdev->mc.vram_width = numchan * chansize;
4409 /* Could aper size report 0 ? */
4410 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4411 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4412 /* size in MB on si */
4413 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4414 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4415 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4416 si_vram_gtt_location(rdev, &rdev->mc);
4417 radeon_update_bandwidth_info(rdev);
4418
4419 return 0;
4420}
4421
4422/*
4423 * GART
4424 * VMID 0 is the physical GPU addresses as used by the kernel.
4425 * VMIDs 1-15 are used for userspace clients and are handled
4426 * by the radeon vm/hsa code.
4427 */
4428/**
4429 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4430 *
4431 * @rdev: radeon_device pointer
4432 *
4433 * Flush the TLB for the VMID 0 page table (CIK).
4434 */
4435void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4436{
4437 /* flush hdp cache */
4438 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4439
4440 /* bits 0-15 are the VM contexts0-15 */
4441 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4442}
4443
4444/**
4445 * cik_pcie_gart_enable - gart enable
4446 *
4447 * @rdev: radeon_device pointer
4448 *
4449 * This sets up the TLBs, programs the page tables for VMID0,
4450 * sets up the hw for VMIDs 1-15 which are allocated on
4451 * demand, and sets up the global locations for the LDS, GDS,
4452 * and GPUVM for FSA64 clients (CIK).
4453 * Returns 0 for success, errors for failure.
4454 */
4455static int cik_pcie_gart_enable(struct radeon_device *rdev)
4456{
4457 int r, i;
4458
4459 if (rdev->gart.robj == NULL) {
4460 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4461 return -EINVAL;
4462 }
4463 r = radeon_gart_table_vram_pin(rdev);
4464 if (r)
4465 return r;
4466 radeon_gart_restore(rdev);
4467 /* Setup TLB control */
4468 WREG32(MC_VM_MX_L1_TLB_CNTL,
4469 (0xA << 7) |
4470 ENABLE_L1_TLB |
4471 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4472 ENABLE_ADVANCED_DRIVER_MODEL |
4473 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4474 /* Setup L2 cache */
4475 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4476 ENABLE_L2_FRAGMENT_PROCESSING |
4477 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4478 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4479 EFFECTIVE_L2_QUEUE_SIZE(7) |
4480 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4481 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4482 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4483 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4484 /* setup context0 */
4485 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4486 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4487 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4488 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4489 (u32)(rdev->dummy_page.addr >> 12));
4490 WREG32(VM_CONTEXT0_CNTL2, 0);
4491 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4492 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4493
4494 WREG32(0x15D4, 0);
4495 WREG32(0x15D8, 0);
4496 WREG32(0x15DC, 0);
4497
4498 /* empty context1-15 */
4499 /* FIXME start with 4G, once using 2 level pt switch to full
4500 * vm size space
4501 */
4502 /* set vm size, must be a multiple of 4 */
4503 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4504 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4505 for (i = 1; i < 16; i++) {
4506 if (i < 8)
4507 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4508 rdev->gart.table_addr >> 12);
4509 else
4510 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4511 rdev->gart.table_addr >> 12);
4512 }
4513
4514 /* enable context1-15 */
4515 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4516 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004517 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004518 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004519 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4520 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4521 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4522 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4523 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4524 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4525 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4526 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4527 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4528 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4529 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4530 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004531
4532 /* TC cache setup ??? */
4533 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4534 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4535 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4536
4537 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4538 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4539 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4540 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4541 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4542
4543 WREG32(TC_CFG_L1_VOLATILE, 0);
4544 WREG32(TC_CFG_L2_VOLATILE, 0);
4545
4546 if (rdev->family == CHIP_KAVERI) {
4547 u32 tmp = RREG32(CHUB_CONTROL);
4548 tmp &= ~BYPASS_VM;
4549 WREG32(CHUB_CONTROL, tmp);
4550 }
4551
4552 /* XXX SH_MEM regs */
4553 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04004554 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004555 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004556 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004557 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004558 WREG32(SH_MEM_CONFIG, 0);
4559 WREG32(SH_MEM_APE1_BASE, 1);
4560 WREG32(SH_MEM_APE1_LIMIT, 0);
4561 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004562 /* SDMA GFX */
4563 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4564 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4565 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4566 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4567 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004568 }
Alex Deucherb556b122013-01-29 10:44:22 -05004569 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04004570 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004571
4572 cik_pcie_gart_tlb_flush(rdev);
4573 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4574 (unsigned)(rdev->mc.gtt_size >> 20),
4575 (unsigned long long)rdev->gart.table_addr);
4576 rdev->gart.ready = true;
4577 return 0;
4578}
4579
4580/**
4581 * cik_pcie_gart_disable - gart disable
4582 *
4583 * @rdev: radeon_device pointer
4584 *
4585 * This disables all VM page table (CIK).
4586 */
4587static void cik_pcie_gart_disable(struct radeon_device *rdev)
4588{
4589 /* Disable all tables */
4590 WREG32(VM_CONTEXT0_CNTL, 0);
4591 WREG32(VM_CONTEXT1_CNTL, 0);
4592 /* Setup TLB control */
4593 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4594 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4595 /* Setup L2 cache */
4596 WREG32(VM_L2_CNTL,
4597 ENABLE_L2_FRAGMENT_PROCESSING |
4598 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4599 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4600 EFFECTIVE_L2_QUEUE_SIZE(7) |
4601 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4602 WREG32(VM_L2_CNTL2, 0);
4603 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4604 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4605 radeon_gart_table_vram_unpin(rdev);
4606}
4607
4608/**
4609 * cik_pcie_gart_fini - vm fini callback
4610 *
4611 * @rdev: radeon_device pointer
4612 *
4613 * Tears down the driver GART/VM setup (CIK).
4614 */
4615static void cik_pcie_gart_fini(struct radeon_device *rdev)
4616{
4617 cik_pcie_gart_disable(rdev);
4618 radeon_gart_table_vram_free(rdev);
4619 radeon_gart_fini(rdev);
4620}
4621
4622/* vm parser */
4623/**
4624 * cik_ib_parse - vm ib_parse callback
4625 *
4626 * @rdev: radeon_device pointer
4627 * @ib: indirect buffer pointer
4628 *
4629 * CIK uses hw IB checking so this is a nop (CIK).
4630 */
4631int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4632{
4633 return 0;
4634}
4635
4636/*
4637 * vm
4638 * VMID 0 is the physical GPU addresses as used by the kernel.
4639 * VMIDs 1-15 are used for userspace clients and are handled
4640 * by the radeon vm/hsa code.
4641 */
4642/**
4643 * cik_vm_init - cik vm init callback
4644 *
4645 * @rdev: radeon_device pointer
4646 *
4647 * Inits cik specific vm parameters (number of VMs, base of vram for
4648 * VMIDs 1-15) (CIK).
4649 * Returns 0 for success.
4650 */
4651int cik_vm_init(struct radeon_device *rdev)
4652{
4653 /* number of VMs */
4654 rdev->vm_manager.nvm = 16;
4655 /* base offset of vram pages */
4656 if (rdev->flags & RADEON_IS_IGP) {
4657 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4658 tmp <<= 22;
4659 rdev->vm_manager.vram_base_offset = tmp;
4660 } else
4661 rdev->vm_manager.vram_base_offset = 0;
4662
4663 return 0;
4664}
4665
4666/**
4667 * cik_vm_fini - cik vm fini callback
4668 *
4669 * @rdev: radeon_device pointer
4670 *
4671 * Tear down any asic specific VM setup (CIK).
4672 */
4673void cik_vm_fini(struct radeon_device *rdev)
4674{
4675}
4676
Alex Deucherf96ab482012-08-31 10:37:47 -04004677/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04004678 * cik_vm_decode_fault - print human readable fault info
4679 *
4680 * @rdev: radeon_device pointer
4681 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4682 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4683 *
4684 * Print human readable fault information (CIK).
4685 */
4686static void cik_vm_decode_fault(struct radeon_device *rdev,
4687 u32 status, u32 addr, u32 mc_client)
4688{
4689 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4690 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4691 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4692 char *block = (char *)&mc_client;
4693
4694 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4695 protections, vmid, addr,
4696 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4697 block, mc_id);
4698}
4699
4700/**
Alex Deucherf96ab482012-08-31 10:37:47 -04004701 * cik_vm_flush - cik vm flush using the CP
4702 *
4703 * @rdev: radeon_device pointer
4704 *
4705 * Update the page table base and flush the VM TLB
4706 * using the CP (CIK).
4707 */
4708void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4709{
4710 struct radeon_ring *ring = &rdev->ring[ridx];
4711
4712 if (vm == NULL)
4713 return;
4714
4715 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4716 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4717 WRITE_DATA_DST_SEL(0)));
4718 if (vm->id < 8) {
4719 radeon_ring_write(ring,
4720 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4721 } else {
4722 radeon_ring_write(ring,
4723 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4724 }
4725 radeon_ring_write(ring, 0);
4726 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4727
4728 /* update SH_MEM_* regs */
4729 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4730 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4731 WRITE_DATA_DST_SEL(0)));
4732 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4733 radeon_ring_write(ring, 0);
4734 radeon_ring_write(ring, VMID(vm->id));
4735
4736 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4737 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4738 WRITE_DATA_DST_SEL(0)));
4739 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4740 radeon_ring_write(ring, 0);
4741
4742 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4743 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4744 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4745 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4746
4747 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749 WRITE_DATA_DST_SEL(0)));
4750 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4751 radeon_ring_write(ring, 0);
4752 radeon_ring_write(ring, VMID(0));
4753
4754 /* HDP flush */
4755 /* We should be using the WAIT_REG_MEM packet here like in
4756 * cik_fence_ring_emit(), but it causes the CP to hang in this
4757 * context...
4758 */
4759 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4760 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4761 WRITE_DATA_DST_SEL(0)));
4762 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4763 radeon_ring_write(ring, 0);
4764 radeon_ring_write(ring, 0);
4765
4766 /* bits 0-15 are the VM contexts0-15 */
4767 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4768 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4769 WRITE_DATA_DST_SEL(0)));
4770 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4771 radeon_ring_write(ring, 0);
4772 radeon_ring_write(ring, 1 << vm->id);
4773
Alex Deucherb07fdd32013-04-11 09:36:17 -04004774 /* compute doesn't have PFP */
4775 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4776 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4777 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4778 radeon_ring_write(ring, 0x0);
4779 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004780}
4781
Alex Deucher605de6b2012-10-22 13:04:03 -04004782/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004783 * cik_vm_set_page - update the page tables using sDMA
4784 *
4785 * @rdev: radeon_device pointer
4786 * @ib: indirect buffer to fill with commands
4787 * @pe: addr of the page entry
4788 * @addr: dst addr to write into pe
4789 * @count: number of page entries to update
4790 * @incr: increase next addr by incr bytes
4791 * @flags: access flags
4792 *
4793 * Update the page tables using CP or sDMA (CIK).
4794 */
4795void cik_vm_set_page(struct radeon_device *rdev,
4796 struct radeon_ib *ib,
4797 uint64_t pe,
4798 uint64_t addr, unsigned count,
4799 uint32_t incr, uint32_t flags)
4800{
4801 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4802 uint64_t value;
4803 unsigned ndw;
4804
4805 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4806 /* CP */
4807 while (count) {
4808 ndw = 2 + count * 2;
4809 if (ndw > 0x3FFE)
4810 ndw = 0x3FFE;
4811
4812 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4813 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4814 WRITE_DATA_DST_SEL(1));
4815 ib->ptr[ib->length_dw++] = pe;
4816 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4817 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4818 if (flags & RADEON_VM_PAGE_SYSTEM) {
4819 value = radeon_vm_map_gart(rdev, addr);
4820 value &= 0xFFFFFFFFFFFFF000ULL;
4821 } else if (flags & RADEON_VM_PAGE_VALID) {
4822 value = addr;
4823 } else {
4824 value = 0;
4825 }
4826 addr += incr;
4827 value |= r600_flags;
4828 ib->ptr[ib->length_dw++] = value;
4829 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4830 }
4831 }
4832 } else {
4833 /* DMA */
Christian König2483b4e2013-08-13 11:56:54 +02004834 cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
Alex Deucherd0e092d2012-08-31 11:00:53 -04004835 }
4836}
4837
Alex Deucherf6796ca2012-11-09 10:44:08 -05004838/*
4839 * RLC
4840 * The RLC is a multi-purpose microengine that handles a
4841 * variety of functions, the most important of which is
4842 * the interrupt controller.
4843 */
Alex Deucher866d83d2013-04-15 17:13:29 -04004844static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4845 bool enable)
Alex Deucherf6796ca2012-11-09 10:44:08 -05004846{
Alex Deucher866d83d2013-04-15 17:13:29 -04004847 u32 tmp = RREG32(CP_INT_CNTL_RING0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004848
Alex Deucher866d83d2013-04-15 17:13:29 -04004849 if (enable)
4850 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4851 else
4852 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004853 WREG32(CP_INT_CNTL_RING0, tmp);
Alex Deucher866d83d2013-04-15 17:13:29 -04004854}
Alex Deucherf6796ca2012-11-09 10:44:08 -05004855
Alex Deucher866d83d2013-04-15 17:13:29 -04004856static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4857{
4858 u32 tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05004859
Alex Deucher866d83d2013-04-15 17:13:29 -04004860 tmp = RREG32(RLC_LB_CNTL);
4861 if (enable)
4862 tmp |= LOAD_BALANCE_ENABLE;
4863 else
4864 tmp &= ~LOAD_BALANCE_ENABLE;
4865 WREG32(RLC_LB_CNTL, tmp);
4866}
Alex Deucherf6796ca2012-11-09 10:44:08 -05004867
Alex Deucher866d83d2013-04-15 17:13:29 -04004868static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4869{
4870 u32 i, j, k;
4871 u32 mask;
Alex Deucherf6796ca2012-11-09 10:44:08 -05004872
4873 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4874 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4875 cik_select_se_sh(rdev, i, j);
4876 for (k = 0; k < rdev->usec_timeout; k++) {
4877 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4878 break;
4879 udelay(1);
4880 }
4881 }
4882 }
4883 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4884
4885 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4886 for (k = 0; k < rdev->usec_timeout; k++) {
4887 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4888 break;
4889 udelay(1);
4890 }
4891}
4892
Alex Deucher22c775c2013-07-23 09:41:05 -04004893static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4894{
4895 u32 tmp;
4896
4897 tmp = RREG32(RLC_CNTL);
4898 if (tmp != rlc)
4899 WREG32(RLC_CNTL, rlc);
4900}
4901
4902static u32 cik_halt_rlc(struct radeon_device *rdev)
4903{
4904 u32 data, orig;
4905
4906 orig = data = RREG32(RLC_CNTL);
4907
4908 if (data & RLC_ENABLE) {
4909 u32 i;
4910
4911 data &= ~RLC_ENABLE;
4912 WREG32(RLC_CNTL, data);
4913
4914 for (i = 0; i < rdev->usec_timeout; i++) {
4915 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4916 break;
4917 udelay(1);
4918 }
4919
4920 cik_wait_for_rlc_serdes(rdev);
4921 }
4922
4923 return orig;
4924}
4925
Alex Deuchera412fce2013-04-22 20:23:31 -04004926void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4927{
4928 u32 tmp, i, mask;
4929
4930 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4931 WREG32(RLC_GPR_REG2, tmp);
4932
4933 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4934 for (i = 0; i < rdev->usec_timeout; i++) {
4935 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4936 break;
4937 udelay(1);
4938 }
4939
4940 for (i = 0; i < rdev->usec_timeout; i++) {
4941 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4942 break;
4943 udelay(1);
4944 }
4945}
4946
4947void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4948{
4949 u32 tmp;
4950
4951 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4952 WREG32(RLC_GPR_REG2, tmp);
4953}
4954
Alex Deucherf6796ca2012-11-09 10:44:08 -05004955/**
Alex Deucher866d83d2013-04-15 17:13:29 -04004956 * cik_rlc_stop - stop the RLC ME
4957 *
4958 * @rdev: radeon_device pointer
4959 *
4960 * Halt the RLC ME (MicroEngine) (CIK).
4961 */
4962static void cik_rlc_stop(struct radeon_device *rdev)
4963{
Alex Deucher22c775c2013-07-23 09:41:05 -04004964 WREG32(RLC_CNTL, 0);
Alex Deucher866d83d2013-04-15 17:13:29 -04004965
4966 cik_enable_gui_idle_interrupt(rdev, false);
4967
Alex Deucher866d83d2013-04-15 17:13:29 -04004968 cik_wait_for_rlc_serdes(rdev);
4969}
4970
4971/**
Alex Deucherf6796ca2012-11-09 10:44:08 -05004972 * cik_rlc_start - start the RLC ME
4973 *
4974 * @rdev: radeon_device pointer
4975 *
4976 * Unhalt the RLC ME (MicroEngine) (CIK).
4977 */
4978static void cik_rlc_start(struct radeon_device *rdev)
4979{
Alex Deucherf6796ca2012-11-09 10:44:08 -05004980 WREG32(RLC_CNTL, RLC_ENABLE);
4981
Alex Deucher866d83d2013-04-15 17:13:29 -04004982 cik_enable_gui_idle_interrupt(rdev, true);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004983
4984 udelay(50);
4985}
4986
4987/**
4988 * cik_rlc_resume - setup the RLC hw
4989 *
4990 * @rdev: radeon_device pointer
4991 *
4992 * Initialize the RLC registers, load the ucode,
4993 * and start the RLC (CIK).
4994 * Returns 0 for success, -EINVAL if the ucode is not available.
4995 */
4996static int cik_rlc_resume(struct radeon_device *rdev)
4997{
Alex Deucher22c775c2013-07-23 09:41:05 -04004998 u32 i, size, tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05004999 const __be32 *fw_data;
5000
5001 if (!rdev->rlc_fw)
5002 return -EINVAL;
5003
5004 switch (rdev->family) {
5005 case CHIP_BONAIRE:
5006 default:
5007 size = BONAIRE_RLC_UCODE_SIZE;
5008 break;
5009 case CHIP_KAVERI:
5010 size = KV_RLC_UCODE_SIZE;
5011 break;
5012 case CHIP_KABINI:
5013 size = KB_RLC_UCODE_SIZE;
5014 break;
5015 }
5016
5017 cik_rlc_stop(rdev);
5018
Alex Deucher22c775c2013-07-23 09:41:05 -04005019 /* disable CG */
5020 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5021 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5022
Alex Deucher866d83d2013-04-15 17:13:29 -04005023 si_rlc_reset(rdev);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005024
Alex Deucher22c775c2013-07-23 09:41:05 -04005025 cik_init_pg(rdev);
5026
5027 cik_init_cg(rdev);
5028
Alex Deucherf6796ca2012-11-09 10:44:08 -05005029 WREG32(RLC_LB_CNTR_INIT, 0);
5030 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5031
5032 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5033 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5034 WREG32(RLC_LB_PARAMS, 0x00600408);
5035 WREG32(RLC_LB_CNTL, 0x80000004);
5036
5037 WREG32(RLC_MC_CNTL, 0);
5038 WREG32(RLC_UCODE_CNTL, 0);
5039
5040 fw_data = (const __be32 *)rdev->rlc_fw->data;
5041 WREG32(RLC_GPM_UCODE_ADDR, 0);
5042 for (i = 0; i < size; i++)
5043 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5044 WREG32(RLC_GPM_UCODE_ADDR, 0);
5045
Alex Deucher866d83d2013-04-15 17:13:29 -04005046 /* XXX - find out what chips support lbpw */
5047 cik_enable_lbpw(rdev, false);
5048
Alex Deucher22c775c2013-07-23 09:41:05 -04005049 if (rdev->family == CHIP_BONAIRE)
5050 WREG32(RLC_DRIVER_DMA_STATUS, 0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005051
5052 cik_rlc_start(rdev);
5053
5054 return 0;
5055}
Alex Deuchera59781b2012-11-09 10:45:57 -05005056
Alex Deucher22c775c2013-07-23 09:41:05 -04005057static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5058{
5059 u32 data, orig, tmp, tmp2;
5060
5061 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5062
5063 cik_enable_gui_idle_interrupt(rdev, enable);
5064
5065 if (enable) {
5066 tmp = cik_halt_rlc(rdev);
5067
5068 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5069 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5070 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5071 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5072 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5073
5074 cik_update_rlc(rdev, tmp);
5075
5076 data |= CGCG_EN | CGLS_EN;
5077 } else {
5078 RREG32(CB_CGTT_SCLK_CTRL);
5079 RREG32(CB_CGTT_SCLK_CTRL);
5080 RREG32(CB_CGTT_SCLK_CTRL);
5081 RREG32(CB_CGTT_SCLK_CTRL);
5082
5083 data &= ~(CGCG_EN | CGLS_EN);
5084 }
5085
5086 if (orig != data)
5087 WREG32(RLC_CGCG_CGLS_CTRL, data);
5088
5089}
5090
5091static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5092{
5093 u32 data, orig, tmp = 0;
5094
5095 if (enable) {
5096 orig = data = RREG32(CP_MEM_SLP_CNTL);
5097 data |= CP_MEM_LS_EN;
5098 if (orig != data)
5099 WREG32(CP_MEM_SLP_CNTL, data);
5100
5101 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5102 data &= 0xfffffffd;
5103 if (orig != data)
5104 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5105
5106 tmp = cik_halt_rlc(rdev);
5107
5108 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5109 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5110 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5111 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5112 WREG32(RLC_SERDES_WR_CTRL, data);
5113
5114 cik_update_rlc(rdev, tmp);
5115
5116 orig = data = RREG32(CGTS_SM_CTRL_REG);
5117 data &= ~SM_MODE_MASK;
5118 data |= SM_MODE(0x2);
5119 data |= SM_MODE_ENABLE;
5120 data &= ~CGTS_OVERRIDE;
5121 data &= ~CGTS_LS_OVERRIDE;
5122 data &= ~ON_MONITOR_ADD_MASK;
5123 data |= ON_MONITOR_ADD_EN;
5124 data |= ON_MONITOR_ADD(0x96);
5125 if (orig != data)
5126 WREG32(CGTS_SM_CTRL_REG, data);
5127 } else {
5128 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5129 data |= 0x00000002;
5130 if (orig != data)
5131 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5132
5133 data = RREG32(RLC_MEM_SLP_CNTL);
5134 if (data & RLC_MEM_LS_EN) {
5135 data &= ~RLC_MEM_LS_EN;
5136 WREG32(RLC_MEM_SLP_CNTL, data);
5137 }
5138
5139 data = RREG32(CP_MEM_SLP_CNTL);
5140 if (data & CP_MEM_LS_EN) {
5141 data &= ~CP_MEM_LS_EN;
5142 WREG32(CP_MEM_SLP_CNTL, data);
5143 }
5144
5145 orig = data = RREG32(CGTS_SM_CTRL_REG);
5146 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5147 if (orig != data)
5148 WREG32(CGTS_SM_CTRL_REG, data);
5149
5150 tmp = cik_halt_rlc(rdev);
5151
5152 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5153 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5154 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5155 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5156 WREG32(RLC_SERDES_WR_CTRL, data);
5157
5158 cik_update_rlc(rdev, tmp);
5159 }
5160}
5161
5162static const u32 mc_cg_registers[] =
5163{
5164 MC_HUB_MISC_HUB_CG,
5165 MC_HUB_MISC_SIP_CG,
5166 MC_HUB_MISC_VM_CG,
5167 MC_XPB_CLK_GAT,
5168 ATC_MISC_CG,
5169 MC_CITF_MISC_WR_CG,
5170 MC_CITF_MISC_RD_CG,
5171 MC_CITF_MISC_VM_CG,
5172 VM_L2_CG,
5173};
5174
5175static void cik_enable_mc_ls(struct radeon_device *rdev,
5176 bool enable)
5177{
5178 int i;
5179 u32 orig, data;
5180
5181 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5182 orig = data = RREG32(mc_cg_registers[i]);
5183 if (enable)
5184 data |= MC_LS_ENABLE;
5185 else
5186 data &= ~MC_LS_ENABLE;
5187 if (data != orig)
5188 WREG32(mc_cg_registers[i], data);
5189 }
5190}
5191
5192static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5193 bool enable)
5194{
5195 int i;
5196 u32 orig, data;
5197
5198 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5199 orig = data = RREG32(mc_cg_registers[i]);
5200 if (enable)
5201 data |= MC_CG_ENABLE;
5202 else
5203 data &= ~MC_CG_ENABLE;
5204 if (data != orig)
5205 WREG32(mc_cg_registers[i], data);
5206 }
5207}
5208
5209static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5210 bool enable)
5211{
5212 u32 orig, data;
5213
5214 if (enable) {
5215 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5216 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5217 } else {
5218 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5219 data |= 0xff000000;
5220 if (data != orig)
5221 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5222
5223 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5224 data |= 0xff000000;
5225 if (data != orig)
5226 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5227 }
5228}
5229
5230static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5231 bool enable)
5232{
5233 u32 orig, data;
5234
5235 if (enable) {
5236 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5237 data |= 0x100;
5238 if (orig != data)
5239 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5240
5241 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5242 data |= 0x100;
5243 if (orig != data)
5244 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5245 } else {
5246 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5247 data &= ~0x100;
5248 if (orig != data)
5249 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5250
5251 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5252 data &= ~0x100;
5253 if (orig != data)
5254 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5255 }
5256}
5257
5258static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5259 bool enable)
5260{
5261 u32 orig, data;
5262
5263 if (enable) {
5264 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5265 data = 0xfff;
5266 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5267
5268 orig = data = RREG32(UVD_CGC_CTRL);
5269 data |= DCM;
5270 if (orig != data)
5271 WREG32(UVD_CGC_CTRL, data);
5272 } else {
5273 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5274 data &= ~0xfff;
5275 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5276
5277 orig = data = RREG32(UVD_CGC_CTRL);
5278 data &= ~DCM;
5279 if (orig != data)
5280 WREG32(UVD_CGC_CTRL, data);
5281 }
5282}
5283
5284static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5285 bool enable)
5286{
5287 u32 orig, data;
5288
5289 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5290
5291 if (enable)
5292 data &= ~CLOCK_GATING_DIS;
5293 else
5294 data |= CLOCK_GATING_DIS;
5295
5296 if (orig != data)
5297 WREG32(HDP_HOST_PATH_CNTL, data);
5298}
5299
5300static void cik_enable_hdp_ls(struct radeon_device *rdev,
5301 bool enable)
5302{
5303 u32 orig, data;
5304
5305 orig = data = RREG32(HDP_MEM_POWER_LS);
5306
5307 if (enable)
5308 data |= HDP_LS_ENABLE;
5309 else
5310 data &= ~HDP_LS_ENABLE;
5311
5312 if (orig != data)
5313 WREG32(HDP_MEM_POWER_LS, data);
5314}
5315
5316void cik_update_cg(struct radeon_device *rdev,
5317 u32 block, bool enable)
5318{
5319 if (block & RADEON_CG_BLOCK_GFX) {
5320 /* order matters! */
5321 if (enable) {
5322 cik_enable_mgcg(rdev, true);
5323 cik_enable_cgcg(rdev, true);
5324 } else {
5325 cik_enable_cgcg(rdev, false);
5326 cik_enable_mgcg(rdev, false);
5327 }
5328 }
5329
5330 if (block & RADEON_CG_BLOCK_MC) {
5331 if (!(rdev->flags & RADEON_IS_IGP)) {
5332 cik_enable_mc_mgcg(rdev, enable);
5333 cik_enable_mc_ls(rdev, enable);
5334 }
5335 }
5336
5337 if (block & RADEON_CG_BLOCK_SDMA) {
5338 cik_enable_sdma_mgcg(rdev, enable);
5339 cik_enable_sdma_mgls(rdev, enable);
5340 }
5341
5342 if (block & RADEON_CG_BLOCK_UVD) {
5343 if (rdev->has_uvd)
5344 cik_enable_uvd_mgcg(rdev, enable);
5345 }
5346
5347 if (block & RADEON_CG_BLOCK_HDP) {
5348 cik_enable_hdp_mgcg(rdev, enable);
5349 cik_enable_hdp_ls(rdev, enable);
5350 }
5351}
5352
5353static void cik_init_cg(struct radeon_device *rdev)
5354{
5355
5356 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
5357
5358 if (rdev->has_uvd)
5359 si_init_uvd_internal_cg(rdev);
5360
5361 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5362 RADEON_CG_BLOCK_SDMA |
5363 RADEON_CG_BLOCK_UVD |
5364 RADEON_CG_BLOCK_HDP), true);
5365}
5366
5367static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5368 bool enable)
5369{
5370 u32 data, orig;
5371
5372 orig = data = RREG32(RLC_PG_CNTL);
5373 if (enable)
5374 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5375 else
5376 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5377 if (orig != data)
5378 WREG32(RLC_PG_CNTL, data);
5379}
5380
5381static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5382 bool enable)
5383{
5384 u32 data, orig;
5385
5386 orig = data = RREG32(RLC_PG_CNTL);
5387 if (enable)
5388 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5389 else
5390 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5391 if (orig != data)
5392 WREG32(RLC_PG_CNTL, data);
5393}
5394
5395static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5396{
5397 u32 data, orig;
5398
5399 orig = data = RREG32(RLC_PG_CNTL);
5400 if (enable)
5401 data &= ~DISABLE_CP_PG;
5402 else
5403 data |= DISABLE_CP_PG;
5404 if (orig != data)
5405 WREG32(RLC_PG_CNTL, data);
5406}
5407
5408static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5409{
5410 u32 data, orig;
5411
5412 orig = data = RREG32(RLC_PG_CNTL);
5413 if (enable)
5414 data &= ~DISABLE_GDS_PG;
5415 else
5416 data |= DISABLE_GDS_PG;
5417 if (orig != data)
5418 WREG32(RLC_PG_CNTL, data);
5419}
5420
5421#define CP_ME_TABLE_SIZE 96
5422#define CP_ME_TABLE_OFFSET 2048
5423#define CP_MEC_TABLE_OFFSET 4096
5424
5425void cik_init_cp_pg_table(struct radeon_device *rdev)
5426{
5427 const __be32 *fw_data;
5428 volatile u32 *dst_ptr;
5429 int me, i, max_me = 4;
5430 u32 bo_offset = 0;
5431 u32 table_offset;
5432
5433 if (rdev->family == CHIP_KAVERI)
5434 max_me = 5;
5435
5436 if (rdev->rlc.cp_table_ptr == NULL)
5437 return;
5438
5439 /* write the cp table buffer */
5440 dst_ptr = rdev->rlc.cp_table_ptr;
5441 for (me = 0; me < max_me; me++) {
5442 if (me == 0) {
5443 fw_data = (const __be32 *)rdev->ce_fw->data;
5444 table_offset = CP_ME_TABLE_OFFSET;
5445 } else if (me == 1) {
5446 fw_data = (const __be32 *)rdev->pfp_fw->data;
5447 table_offset = CP_ME_TABLE_OFFSET;
5448 } else if (me == 2) {
5449 fw_data = (const __be32 *)rdev->me_fw->data;
5450 table_offset = CP_ME_TABLE_OFFSET;
5451 } else {
5452 fw_data = (const __be32 *)rdev->mec_fw->data;
5453 table_offset = CP_MEC_TABLE_OFFSET;
5454 }
5455
5456 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5457 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5458 }
5459 bo_offset += CP_ME_TABLE_SIZE;
5460 }
5461}
5462
5463static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5464 bool enable)
5465{
5466 u32 data, orig;
5467
5468 if (enable) {
5469 orig = data = RREG32(RLC_PG_CNTL);
5470 data |= GFX_PG_ENABLE;
5471 if (orig != data)
5472 WREG32(RLC_PG_CNTL, data);
5473
5474 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5475 data |= AUTO_PG_EN;
5476 if (orig != data)
5477 WREG32(RLC_AUTO_PG_CTRL, data);
5478 } else {
5479 orig = data = RREG32(RLC_PG_CNTL);
5480 data &= ~GFX_PG_ENABLE;
5481 if (orig != data)
5482 WREG32(RLC_PG_CNTL, data);
5483
5484 orig = data = RREG32(RLC_AUTO_PG_CTRL);
5485 data &= ~AUTO_PG_EN;
5486 if (orig != data)
5487 WREG32(RLC_AUTO_PG_CTRL, data);
5488
5489 data = RREG32(DB_RENDER_CONTROL);
5490 }
5491}
5492
5493static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5494{
5495 u32 mask = 0, tmp, tmp1;
5496 int i;
5497
5498 cik_select_se_sh(rdev, se, sh);
5499 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5500 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5501 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5502
5503 tmp &= 0xffff0000;
5504
5505 tmp |= tmp1;
5506 tmp >>= 16;
5507
5508 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5509 mask <<= 1;
5510 mask |= 1;
5511 }
5512
5513 return (~tmp) & mask;
5514}
5515
5516static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5517{
5518 u32 i, j, k, active_cu_number = 0;
5519 u32 mask, counter, cu_bitmap;
5520 u32 tmp = 0;
5521
5522 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5523 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5524 mask = 1;
5525 cu_bitmap = 0;
5526 counter = 0;
5527 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5528 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5529 if (counter < 2)
5530 cu_bitmap |= mask;
5531 counter ++;
5532 }
5533 mask <<= 1;
5534 }
5535
5536 active_cu_number += counter;
5537 tmp |= (cu_bitmap << (i * 16 + j * 8));
5538 }
5539 }
5540
5541 WREG32(RLC_PG_AO_CU_MASK, tmp);
5542
5543 tmp = RREG32(RLC_MAX_PG_CU);
5544 tmp &= ~MAX_PU_CU_MASK;
5545 tmp |= MAX_PU_CU(active_cu_number);
5546 WREG32(RLC_MAX_PG_CU, tmp);
5547}
5548
5549static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5550 bool enable)
5551{
5552 u32 data, orig;
5553
5554 orig = data = RREG32(RLC_PG_CNTL);
5555 if (enable)
5556 data |= STATIC_PER_CU_PG_ENABLE;
5557 else
5558 data &= ~STATIC_PER_CU_PG_ENABLE;
5559 if (orig != data)
5560 WREG32(RLC_PG_CNTL, data);
5561}
5562
5563static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5564 bool enable)
5565{
5566 u32 data, orig;
5567
5568 orig = data = RREG32(RLC_PG_CNTL);
5569 if (enable)
5570 data |= DYN_PER_CU_PG_ENABLE;
5571 else
5572 data &= ~DYN_PER_CU_PG_ENABLE;
5573 if (orig != data)
5574 WREG32(RLC_PG_CNTL, data);
5575}
5576
5577#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5578#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
5579
5580static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5581{
5582 u32 data, orig;
5583 u32 i;
5584
5585 if (rdev->rlc.cs_data) {
5586 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5587 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5588 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
5589 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5590 } else {
5591 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5592 for (i = 0; i < 3; i++)
5593 WREG32(RLC_GPM_SCRATCH_DATA, 0);
5594 }
5595 if (rdev->rlc.reg_list) {
5596 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5597 for (i = 0; i < rdev->rlc.reg_list_size; i++)
5598 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5599 }
5600
5601 orig = data = RREG32(RLC_PG_CNTL);
5602 data |= GFX_PG_SRC;
5603 if (orig != data)
5604 WREG32(RLC_PG_CNTL, data);
5605
5606 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5607 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5608
5609 data = RREG32(CP_RB_WPTR_POLL_CNTL);
5610 data &= ~IDLE_POLL_COUNT_MASK;
5611 data |= IDLE_POLL_COUNT(0x60);
5612 WREG32(CP_RB_WPTR_POLL_CNTL, data);
5613
5614 data = 0x10101010;
5615 WREG32(RLC_PG_DELAY, data);
5616
5617 data = RREG32(RLC_PG_DELAY_2);
5618 data &= ~0xff;
5619 data |= 0x3;
5620 WREG32(RLC_PG_DELAY_2, data);
5621
5622 data = RREG32(RLC_AUTO_PG_CTRL);
5623 data &= ~GRBM_REG_SGIT_MASK;
5624 data |= GRBM_REG_SGIT(0x700);
5625 WREG32(RLC_AUTO_PG_CTRL, data);
5626
5627}
5628
5629static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5630{
5631 bool has_pg = false;
5632 bool has_dyn_mgpg = false;
5633 bool has_static_mgpg = false;
5634
5635 /* only APUs have PG */
5636 if (rdev->flags & RADEON_IS_IGP) {
5637 has_pg = true;
5638 has_static_mgpg = true;
5639 if (rdev->family == CHIP_KAVERI)
5640 has_dyn_mgpg = true;
5641 }
5642
5643 if (has_pg) {
5644 cik_enable_gfx_cgpg(rdev, enable);
5645 if (enable) {
5646 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
5647 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
5648 } else {
5649 cik_enable_gfx_static_mgpg(rdev, false);
5650 cik_enable_gfx_dynamic_mgpg(rdev, false);
5651 }
5652 }
5653
5654}
5655
5656void cik_init_pg(struct radeon_device *rdev)
5657{
5658 bool has_pg = false;
5659
5660 /* only APUs have PG */
5661 if (rdev->flags & RADEON_IS_IGP) {
5662 /* XXX disable this for now */
5663 /* has_pg = true; */
5664 }
5665
5666 if (has_pg) {
5667 cik_enable_sck_slowdown_on_pu(rdev, true);
5668 cik_enable_sck_slowdown_on_pd(rdev, true);
5669 cik_init_gfx_cgpg(rdev);
5670 cik_enable_cp_pg(rdev, true);
5671 cik_enable_gds_pg(rdev, true);
5672 cik_init_ao_cu_mask(rdev);
5673 cik_update_gfx_pg(rdev, true);
5674 }
5675}
5676
Alex Deuchera59781b2012-11-09 10:45:57 -05005677/*
5678 * Interrupts
5679 * Starting with r6xx, interrupts are handled via a ring buffer.
5680 * Ring buffers are areas of GPU accessible memory that the GPU
5681 * writes interrupt vectors into and the host reads vectors out of.
5682 * There is a rptr (read pointer) that determines where the
5683 * host is currently reading, and a wptr (write pointer)
5684 * which determines where the GPU has written. When the
5685 * pointers are equal, the ring is idle. When the GPU
5686 * writes vectors to the ring buffer, it increments the
5687 * wptr. When there is an interrupt, the host then starts
5688 * fetching commands and processing them until the pointers are
5689 * equal again at which point it updates the rptr.
5690 */
5691
5692/**
5693 * cik_enable_interrupts - Enable the interrupt ring buffer
5694 *
5695 * @rdev: radeon_device pointer
5696 *
5697 * Enable the interrupt ring buffer (CIK).
5698 */
5699static void cik_enable_interrupts(struct radeon_device *rdev)
5700{
5701 u32 ih_cntl = RREG32(IH_CNTL);
5702 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5703
5704 ih_cntl |= ENABLE_INTR;
5705 ih_rb_cntl |= IH_RB_ENABLE;
5706 WREG32(IH_CNTL, ih_cntl);
5707 WREG32(IH_RB_CNTL, ih_rb_cntl);
5708 rdev->ih.enabled = true;
5709}
5710
5711/**
5712 * cik_disable_interrupts - Disable the interrupt ring buffer
5713 *
5714 * @rdev: radeon_device pointer
5715 *
5716 * Disable the interrupt ring buffer (CIK).
5717 */
5718static void cik_disable_interrupts(struct radeon_device *rdev)
5719{
5720 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5721 u32 ih_cntl = RREG32(IH_CNTL);
5722
5723 ih_rb_cntl &= ~IH_RB_ENABLE;
5724 ih_cntl &= ~ENABLE_INTR;
5725 WREG32(IH_RB_CNTL, ih_rb_cntl);
5726 WREG32(IH_CNTL, ih_cntl);
5727 /* set rptr, wptr to 0 */
5728 WREG32(IH_RB_RPTR, 0);
5729 WREG32(IH_RB_WPTR, 0);
5730 rdev->ih.enabled = false;
5731 rdev->ih.rptr = 0;
5732}
5733
5734/**
5735 * cik_disable_interrupt_state - Disable all interrupt sources
5736 *
5737 * @rdev: radeon_device pointer
5738 *
5739 * Clear all interrupt enable bits used by the driver (CIK).
5740 */
5741static void cik_disable_interrupt_state(struct radeon_device *rdev)
5742{
5743 u32 tmp;
5744
5745 /* gfx ring */
5746 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04005747 /* sdma */
5748 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5749 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5750 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5751 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05005752 /* compute queues */
5753 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5754 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5755 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5756 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5757 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5758 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5759 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5760 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5761 /* grbm */
5762 WREG32(GRBM_INT_CNTL, 0);
5763 /* vline/vblank, etc. */
5764 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5765 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5766 if (rdev->num_crtc >= 4) {
5767 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5768 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5769 }
5770 if (rdev->num_crtc >= 6) {
5771 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5772 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5773 }
5774
5775 /* dac hotplug */
5776 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5777
5778 /* digital hotplug */
5779 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5780 WREG32(DC_HPD1_INT_CONTROL, tmp);
5781 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5782 WREG32(DC_HPD2_INT_CONTROL, tmp);
5783 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5784 WREG32(DC_HPD3_INT_CONTROL, tmp);
5785 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5786 WREG32(DC_HPD4_INT_CONTROL, tmp);
5787 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5788 WREG32(DC_HPD5_INT_CONTROL, tmp);
5789 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5790 WREG32(DC_HPD6_INT_CONTROL, tmp);
5791
5792}
5793
5794/**
5795 * cik_irq_init - init and enable the interrupt ring
5796 *
5797 * @rdev: radeon_device pointer
5798 *
5799 * Allocate a ring buffer for the interrupt controller,
5800 * enable the RLC, disable interrupts, enable the IH
5801 * ring buffer and enable it (CIK).
5802 * Called at device load and reume.
5803 * Returns 0 for success, errors for failure.
5804 */
5805static int cik_irq_init(struct radeon_device *rdev)
5806{
5807 int ret = 0;
5808 int rb_bufsz;
5809 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5810
5811 /* allocate ring */
5812 ret = r600_ih_ring_alloc(rdev);
5813 if (ret)
5814 return ret;
5815
5816 /* disable irqs */
5817 cik_disable_interrupts(rdev);
5818
5819 /* init rlc */
5820 ret = cik_rlc_resume(rdev);
5821 if (ret) {
5822 r600_ih_ring_fini(rdev);
5823 return ret;
5824 }
5825
5826 /* setup interrupt control */
5827 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5828 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5829 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5830 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5831 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5832 */
5833 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5834 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5835 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5836 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5837
5838 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5839 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5840
5841 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5842 IH_WPTR_OVERFLOW_CLEAR |
5843 (rb_bufsz << 1));
5844
5845 if (rdev->wb.enabled)
5846 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5847
5848 /* set the writeback address whether it's enabled or not */
5849 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5850 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5851
5852 WREG32(IH_RB_CNTL, ih_rb_cntl);
5853
5854 /* set rptr, wptr to 0 */
5855 WREG32(IH_RB_RPTR, 0);
5856 WREG32(IH_RB_WPTR, 0);
5857
5858 /* Default settings for IH_CNTL (disabled at first) */
5859 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5860 /* RPTR_REARM only works if msi's are enabled */
5861 if (rdev->msi_enabled)
5862 ih_cntl |= RPTR_REARM;
5863 WREG32(IH_CNTL, ih_cntl);
5864
5865 /* force the active interrupt state to all disabled */
5866 cik_disable_interrupt_state(rdev);
5867
5868 pci_set_master(rdev->pdev);
5869
5870 /* enable irqs */
5871 cik_enable_interrupts(rdev);
5872
5873 return ret;
5874}
5875
5876/**
5877 * cik_irq_set - enable/disable interrupt sources
5878 *
5879 * @rdev: radeon_device pointer
5880 *
5881 * Enable interrupt sources on the GPU (vblanks, hpd,
5882 * etc.) (CIK).
5883 * Returns 0 for success, errors for failure.
5884 */
5885int cik_irq_set(struct radeon_device *rdev)
5886{
5887 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5888 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005889 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5890 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005891 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5892 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5893 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005894 u32 dma_cntl, dma_cntl1;
Alex Deucher41a524a2013-08-14 01:01:40 -04005895 u32 thermal_int;
Alex Deuchera59781b2012-11-09 10:45:57 -05005896
5897 if (!rdev->irq.installed) {
5898 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5899 return -EINVAL;
5900 }
5901 /* don't enable anything if the ih is disabled */
5902 if (!rdev->ih.enabled) {
5903 cik_disable_interrupts(rdev);
5904 /* force the active interrupt state to all disabled */
5905 cik_disable_interrupt_state(rdev);
5906 return 0;
5907 }
5908
5909 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5910 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5911 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5912 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5913 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5914 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5915
Alex Deucher21a93e12013-04-09 12:47:11 -04005916 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5917 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5918
Alex Deucher2b0781a2013-04-09 14:26:16 -04005919 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5920 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5921 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5922 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5923 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5924 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5925 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5926 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5927
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04005928 if (rdev->flags & RADEON_IS_IGP)
5929 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
5930 ~(THERM_INTH_MASK | THERM_INTL_MASK);
5931 else
5932 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
5933 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
Alex Deucher41a524a2013-08-14 01:01:40 -04005934
Alex Deuchera59781b2012-11-09 10:45:57 -05005935 /* enable CP interrupts on all rings */
5936 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5937 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5938 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5939 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005940 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5941 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5942 DRM_DEBUG("si_irq_set: sw int cp1\n");
5943 if (ring->me == 1) {
5944 switch (ring->pipe) {
5945 case 0:
5946 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5947 break;
5948 case 1:
5949 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5950 break;
5951 case 2:
5952 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5953 break;
5954 case 3:
5955 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5956 break;
5957 default:
5958 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5959 break;
5960 }
5961 } else if (ring->me == 2) {
5962 switch (ring->pipe) {
5963 case 0:
5964 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5965 break;
5966 case 1:
5967 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5968 break;
5969 case 2:
5970 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5971 break;
5972 case 3:
5973 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5974 break;
5975 default:
5976 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5977 break;
5978 }
5979 } else {
5980 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5981 }
5982 }
5983 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5984 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5985 DRM_DEBUG("si_irq_set: sw int cp2\n");
5986 if (ring->me == 1) {
5987 switch (ring->pipe) {
5988 case 0:
5989 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5990 break;
5991 case 1:
5992 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5993 break;
5994 case 2:
5995 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5996 break;
5997 case 3:
5998 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5999 break;
6000 default:
6001 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6002 break;
6003 }
6004 } else if (ring->me == 2) {
6005 switch (ring->pipe) {
6006 case 0:
6007 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6008 break;
6009 case 1:
6010 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6011 break;
6012 case 2:
6013 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6014 break;
6015 case 3:
6016 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6017 break;
6018 default:
6019 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6020 break;
6021 }
6022 } else {
6023 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6024 }
6025 }
Alex Deuchera59781b2012-11-09 10:45:57 -05006026
Alex Deucher21a93e12013-04-09 12:47:11 -04006027 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6028 DRM_DEBUG("cik_irq_set: sw int dma\n");
6029 dma_cntl |= TRAP_ENABLE;
6030 }
6031
6032 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6033 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6034 dma_cntl1 |= TRAP_ENABLE;
6035 }
6036
Alex Deuchera59781b2012-11-09 10:45:57 -05006037 if (rdev->irq.crtc_vblank_int[0] ||
6038 atomic_read(&rdev->irq.pflip[0])) {
6039 DRM_DEBUG("cik_irq_set: vblank 0\n");
6040 crtc1 |= VBLANK_INTERRUPT_MASK;
6041 }
6042 if (rdev->irq.crtc_vblank_int[1] ||
6043 atomic_read(&rdev->irq.pflip[1])) {
6044 DRM_DEBUG("cik_irq_set: vblank 1\n");
6045 crtc2 |= VBLANK_INTERRUPT_MASK;
6046 }
6047 if (rdev->irq.crtc_vblank_int[2] ||
6048 atomic_read(&rdev->irq.pflip[2])) {
6049 DRM_DEBUG("cik_irq_set: vblank 2\n");
6050 crtc3 |= VBLANK_INTERRUPT_MASK;
6051 }
6052 if (rdev->irq.crtc_vblank_int[3] ||
6053 atomic_read(&rdev->irq.pflip[3])) {
6054 DRM_DEBUG("cik_irq_set: vblank 3\n");
6055 crtc4 |= VBLANK_INTERRUPT_MASK;
6056 }
6057 if (rdev->irq.crtc_vblank_int[4] ||
6058 atomic_read(&rdev->irq.pflip[4])) {
6059 DRM_DEBUG("cik_irq_set: vblank 4\n");
6060 crtc5 |= VBLANK_INTERRUPT_MASK;
6061 }
6062 if (rdev->irq.crtc_vblank_int[5] ||
6063 atomic_read(&rdev->irq.pflip[5])) {
6064 DRM_DEBUG("cik_irq_set: vblank 5\n");
6065 crtc6 |= VBLANK_INTERRUPT_MASK;
6066 }
6067 if (rdev->irq.hpd[0]) {
6068 DRM_DEBUG("cik_irq_set: hpd 1\n");
6069 hpd1 |= DC_HPDx_INT_EN;
6070 }
6071 if (rdev->irq.hpd[1]) {
6072 DRM_DEBUG("cik_irq_set: hpd 2\n");
6073 hpd2 |= DC_HPDx_INT_EN;
6074 }
6075 if (rdev->irq.hpd[2]) {
6076 DRM_DEBUG("cik_irq_set: hpd 3\n");
6077 hpd3 |= DC_HPDx_INT_EN;
6078 }
6079 if (rdev->irq.hpd[3]) {
6080 DRM_DEBUG("cik_irq_set: hpd 4\n");
6081 hpd4 |= DC_HPDx_INT_EN;
6082 }
6083 if (rdev->irq.hpd[4]) {
6084 DRM_DEBUG("cik_irq_set: hpd 5\n");
6085 hpd5 |= DC_HPDx_INT_EN;
6086 }
6087 if (rdev->irq.hpd[5]) {
6088 DRM_DEBUG("cik_irq_set: hpd 6\n");
6089 hpd6 |= DC_HPDx_INT_EN;
6090 }
6091
Alex Deucher41a524a2013-08-14 01:01:40 -04006092 if (rdev->irq.dpm_thermal) {
6093 DRM_DEBUG("dpm thermal\n");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006094 if (rdev->flags & RADEON_IS_IGP)
6095 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6096 else
6097 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
Alex Deucher41a524a2013-08-14 01:01:40 -04006098 }
6099
Alex Deuchera59781b2012-11-09 10:45:57 -05006100 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6101
Alex Deucher21a93e12013-04-09 12:47:11 -04006102 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6103 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6104
Alex Deucher2b0781a2013-04-09 14:26:16 -04006105 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6106 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6107 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6108 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6109 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6110 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6111 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6112 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6113
Alex Deuchera59781b2012-11-09 10:45:57 -05006114 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6115
6116 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6117 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6118 if (rdev->num_crtc >= 4) {
6119 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6120 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6121 }
6122 if (rdev->num_crtc >= 6) {
6123 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6124 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6125 }
6126
6127 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6128 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6129 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6130 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6131 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6132 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6133
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006134 if (rdev->flags & RADEON_IS_IGP)
6135 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6136 else
6137 WREG32_SMC(CG_THERMAL_INT, thermal_int);
Alex Deucher41a524a2013-08-14 01:01:40 -04006138
Alex Deuchera59781b2012-11-09 10:45:57 -05006139 return 0;
6140}
6141
6142/**
6143 * cik_irq_ack - ack interrupt sources
6144 *
6145 * @rdev: radeon_device pointer
6146 *
6147 * Ack interrupt sources on the GPU (vblanks, hpd,
6148 * etc.) (CIK). Certain interrupts sources are sw
6149 * generated and do not require an explicit ack.
6150 */
6151static inline void cik_irq_ack(struct radeon_device *rdev)
6152{
6153 u32 tmp;
6154
6155 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6156 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6157 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6158 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6159 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6160 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6161 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6162
6163 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6164 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6165 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6166 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6167 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6168 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6169 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6170 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6171
6172 if (rdev->num_crtc >= 4) {
6173 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6174 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6175 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6176 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6177 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6178 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6179 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6180 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6181 }
6182
6183 if (rdev->num_crtc >= 6) {
6184 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6185 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6186 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6187 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6188 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6189 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6190 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6191 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6192 }
6193
6194 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6195 tmp = RREG32(DC_HPD1_INT_CONTROL);
6196 tmp |= DC_HPDx_INT_ACK;
6197 WREG32(DC_HPD1_INT_CONTROL, tmp);
6198 }
6199 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6200 tmp = RREG32(DC_HPD2_INT_CONTROL);
6201 tmp |= DC_HPDx_INT_ACK;
6202 WREG32(DC_HPD2_INT_CONTROL, tmp);
6203 }
6204 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6205 tmp = RREG32(DC_HPD3_INT_CONTROL);
6206 tmp |= DC_HPDx_INT_ACK;
6207 WREG32(DC_HPD3_INT_CONTROL, tmp);
6208 }
6209 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6210 tmp = RREG32(DC_HPD4_INT_CONTROL);
6211 tmp |= DC_HPDx_INT_ACK;
6212 WREG32(DC_HPD4_INT_CONTROL, tmp);
6213 }
6214 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6215 tmp = RREG32(DC_HPD5_INT_CONTROL);
6216 tmp |= DC_HPDx_INT_ACK;
6217 WREG32(DC_HPD5_INT_CONTROL, tmp);
6218 }
6219 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6220 tmp = RREG32(DC_HPD5_INT_CONTROL);
6221 tmp |= DC_HPDx_INT_ACK;
6222 WREG32(DC_HPD6_INT_CONTROL, tmp);
6223 }
6224}
6225
6226/**
6227 * cik_irq_disable - disable interrupts
6228 *
6229 * @rdev: radeon_device pointer
6230 *
6231 * Disable interrupts on the hw (CIK).
6232 */
6233static void cik_irq_disable(struct radeon_device *rdev)
6234{
6235 cik_disable_interrupts(rdev);
6236 /* Wait and acknowledge irq */
6237 mdelay(1);
6238 cik_irq_ack(rdev);
6239 cik_disable_interrupt_state(rdev);
6240}
6241
6242/**
6243 * cik_irq_disable - disable interrupts for suspend
6244 *
6245 * @rdev: radeon_device pointer
6246 *
6247 * Disable interrupts and stop the RLC (CIK).
6248 * Used for suspend.
6249 */
6250static void cik_irq_suspend(struct radeon_device *rdev)
6251{
6252 cik_irq_disable(rdev);
6253 cik_rlc_stop(rdev);
6254}
6255
6256/**
6257 * cik_irq_fini - tear down interrupt support
6258 *
6259 * @rdev: radeon_device pointer
6260 *
6261 * Disable interrupts on the hw and free the IH ring
6262 * buffer (CIK).
6263 * Used for driver unload.
6264 */
6265static void cik_irq_fini(struct radeon_device *rdev)
6266{
6267 cik_irq_suspend(rdev);
6268 r600_ih_ring_fini(rdev);
6269}
6270
6271/**
6272 * cik_get_ih_wptr - get the IH ring buffer wptr
6273 *
6274 * @rdev: radeon_device pointer
6275 *
6276 * Get the IH ring buffer wptr from either the register
6277 * or the writeback memory buffer (CIK). Also check for
6278 * ring buffer overflow and deal with it.
6279 * Used by cik_irq_process().
6280 * Returns the value of the wptr.
6281 */
6282static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6283{
6284 u32 wptr, tmp;
6285
6286 if (rdev->wb.enabled)
6287 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6288 else
6289 wptr = RREG32(IH_RB_WPTR);
6290
6291 if (wptr & RB_OVERFLOW) {
6292 /* When a ring buffer overflow happen start parsing interrupt
6293 * from the last not overwritten vector (wptr + 16). Hopefully
6294 * this should allow us to catchup.
6295 */
6296 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6297 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6298 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6299 tmp = RREG32(IH_RB_CNTL);
6300 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6301 WREG32(IH_RB_CNTL, tmp);
6302 }
6303 return (wptr & rdev->ih.ptr_mask);
6304}
6305
6306/* CIK IV Ring
6307 * Each IV ring entry is 128 bits:
6308 * [7:0] - interrupt source id
6309 * [31:8] - reserved
6310 * [59:32] - interrupt source data
6311 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04006312 * [71:64] - RINGID
6313 * CP:
6314 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05006315 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6316 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6317 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6318 * PIPE_ID - ME0 0=3D
6319 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04006320 * SDMA:
6321 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
6322 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
6323 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05006324 * [79:72] - VMID
6325 * [95:80] - PASID
6326 * [127:96] - reserved
6327 */
6328/**
6329 * cik_irq_process - interrupt handler
6330 *
6331 * @rdev: radeon_device pointer
6332 *
6333 * Interrupt hander (CIK). Walk the IH ring,
6334 * ack interrupts and schedule work to handle
6335 * interrupt events.
6336 * Returns irq process return code.
6337 */
6338int cik_irq_process(struct radeon_device *rdev)
6339{
Alex Deucher2b0781a2013-04-09 14:26:16 -04006340 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6341 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05006342 u32 wptr;
6343 u32 rptr;
6344 u32 src_id, src_data, ring_id;
6345 u8 me_id, pipe_id, queue_id;
6346 u32 ring_index;
6347 bool queue_hotplug = false;
6348 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04006349 u32 addr, status, mc_client;
Alex Deucher41a524a2013-08-14 01:01:40 -04006350 bool queue_thermal = false;
Alex Deuchera59781b2012-11-09 10:45:57 -05006351
6352 if (!rdev->ih.enabled || rdev->shutdown)
6353 return IRQ_NONE;
6354
6355 wptr = cik_get_ih_wptr(rdev);
6356
6357restart_ih:
6358 /* is somebody else already processing irqs? */
6359 if (atomic_xchg(&rdev->ih.lock, 1))
6360 return IRQ_NONE;
6361
6362 rptr = rdev->ih.rptr;
6363 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6364
6365 /* Order reading of wptr vs. reading of IH ring data */
6366 rmb();
6367
6368 /* display interrupts */
6369 cik_irq_ack(rdev);
6370
6371 while (rptr != wptr) {
6372 /* wptr/rptr are in bytes! */
6373 ring_index = rptr / 4;
6374 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6375 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6376 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05006377
6378 switch (src_id) {
6379 case 1: /* D1 vblank/vline */
6380 switch (src_data) {
6381 case 0: /* D1 vblank */
6382 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6383 if (rdev->irq.crtc_vblank_int[0]) {
6384 drm_handle_vblank(rdev->ddev, 0);
6385 rdev->pm.vblank_sync = true;
6386 wake_up(&rdev->irq.vblank_queue);
6387 }
6388 if (atomic_read(&rdev->irq.pflip[0]))
6389 radeon_crtc_handle_flip(rdev, 0);
6390 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6391 DRM_DEBUG("IH: D1 vblank\n");
6392 }
6393 break;
6394 case 1: /* D1 vline */
6395 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6396 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6397 DRM_DEBUG("IH: D1 vline\n");
6398 }
6399 break;
6400 default:
6401 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6402 break;
6403 }
6404 break;
6405 case 2: /* D2 vblank/vline */
6406 switch (src_data) {
6407 case 0: /* D2 vblank */
6408 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6409 if (rdev->irq.crtc_vblank_int[1]) {
6410 drm_handle_vblank(rdev->ddev, 1);
6411 rdev->pm.vblank_sync = true;
6412 wake_up(&rdev->irq.vblank_queue);
6413 }
6414 if (atomic_read(&rdev->irq.pflip[1]))
6415 radeon_crtc_handle_flip(rdev, 1);
6416 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6417 DRM_DEBUG("IH: D2 vblank\n");
6418 }
6419 break;
6420 case 1: /* D2 vline */
6421 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6422 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6423 DRM_DEBUG("IH: D2 vline\n");
6424 }
6425 break;
6426 default:
6427 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6428 break;
6429 }
6430 break;
6431 case 3: /* D3 vblank/vline */
6432 switch (src_data) {
6433 case 0: /* D3 vblank */
6434 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6435 if (rdev->irq.crtc_vblank_int[2]) {
6436 drm_handle_vblank(rdev->ddev, 2);
6437 rdev->pm.vblank_sync = true;
6438 wake_up(&rdev->irq.vblank_queue);
6439 }
6440 if (atomic_read(&rdev->irq.pflip[2]))
6441 radeon_crtc_handle_flip(rdev, 2);
6442 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6443 DRM_DEBUG("IH: D3 vblank\n");
6444 }
6445 break;
6446 case 1: /* D3 vline */
6447 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6448 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6449 DRM_DEBUG("IH: D3 vline\n");
6450 }
6451 break;
6452 default:
6453 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6454 break;
6455 }
6456 break;
6457 case 4: /* D4 vblank/vline */
6458 switch (src_data) {
6459 case 0: /* D4 vblank */
6460 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6461 if (rdev->irq.crtc_vblank_int[3]) {
6462 drm_handle_vblank(rdev->ddev, 3);
6463 rdev->pm.vblank_sync = true;
6464 wake_up(&rdev->irq.vblank_queue);
6465 }
6466 if (atomic_read(&rdev->irq.pflip[3]))
6467 radeon_crtc_handle_flip(rdev, 3);
6468 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6469 DRM_DEBUG("IH: D4 vblank\n");
6470 }
6471 break;
6472 case 1: /* D4 vline */
6473 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6474 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6475 DRM_DEBUG("IH: D4 vline\n");
6476 }
6477 break;
6478 default:
6479 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6480 break;
6481 }
6482 break;
6483 case 5: /* D5 vblank/vline */
6484 switch (src_data) {
6485 case 0: /* D5 vblank */
6486 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6487 if (rdev->irq.crtc_vblank_int[4]) {
6488 drm_handle_vblank(rdev->ddev, 4);
6489 rdev->pm.vblank_sync = true;
6490 wake_up(&rdev->irq.vblank_queue);
6491 }
6492 if (atomic_read(&rdev->irq.pflip[4]))
6493 radeon_crtc_handle_flip(rdev, 4);
6494 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6495 DRM_DEBUG("IH: D5 vblank\n");
6496 }
6497 break;
6498 case 1: /* D5 vline */
6499 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6500 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6501 DRM_DEBUG("IH: D5 vline\n");
6502 }
6503 break;
6504 default:
6505 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6506 break;
6507 }
6508 break;
6509 case 6: /* D6 vblank/vline */
6510 switch (src_data) {
6511 case 0: /* D6 vblank */
6512 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6513 if (rdev->irq.crtc_vblank_int[5]) {
6514 drm_handle_vblank(rdev->ddev, 5);
6515 rdev->pm.vblank_sync = true;
6516 wake_up(&rdev->irq.vblank_queue);
6517 }
6518 if (atomic_read(&rdev->irq.pflip[5]))
6519 radeon_crtc_handle_flip(rdev, 5);
6520 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6521 DRM_DEBUG("IH: D6 vblank\n");
6522 }
6523 break;
6524 case 1: /* D6 vline */
6525 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6526 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6527 DRM_DEBUG("IH: D6 vline\n");
6528 }
6529 break;
6530 default:
6531 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6532 break;
6533 }
6534 break;
6535 case 42: /* HPD hotplug */
6536 switch (src_data) {
6537 case 0:
6538 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6539 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6540 queue_hotplug = true;
6541 DRM_DEBUG("IH: HPD1\n");
6542 }
6543 break;
6544 case 1:
6545 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6546 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6547 queue_hotplug = true;
6548 DRM_DEBUG("IH: HPD2\n");
6549 }
6550 break;
6551 case 2:
6552 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6553 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6554 queue_hotplug = true;
6555 DRM_DEBUG("IH: HPD3\n");
6556 }
6557 break;
6558 case 3:
6559 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6560 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6561 queue_hotplug = true;
6562 DRM_DEBUG("IH: HPD4\n");
6563 }
6564 break;
6565 case 4:
6566 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6567 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6568 queue_hotplug = true;
6569 DRM_DEBUG("IH: HPD5\n");
6570 }
6571 break;
6572 case 5:
6573 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6574 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6575 queue_hotplug = true;
6576 DRM_DEBUG("IH: HPD6\n");
6577 }
6578 break;
6579 default:
6580 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6581 break;
6582 }
6583 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04006584 case 146:
6585 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04006586 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6587 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6588 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04006589 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6590 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04006591 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04006592 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04006593 status);
6594 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04006595 /* reset addr and status */
6596 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6597 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05006598 case 176: /* GFX RB CP_INT */
6599 case 177: /* GFX IB CP_INT */
6600 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6601 break;
6602 case 181: /* CP EOP event */
6603 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04006604 /* XXX check the bitfield order! */
6605 me_id = (ring_id & 0x60) >> 5;
6606 pipe_id = (ring_id & 0x18) >> 3;
6607 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05006608 switch (me_id) {
6609 case 0:
6610 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6611 break;
6612 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05006613 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04006614 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6615 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6616 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6617 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05006618 break;
6619 }
6620 break;
6621 case 184: /* CP Privileged reg access */
6622 DRM_ERROR("Illegal register access in command stream\n");
6623 /* XXX check the bitfield order! */
6624 me_id = (ring_id & 0x60) >> 5;
6625 pipe_id = (ring_id & 0x18) >> 3;
6626 queue_id = (ring_id & 0x7) >> 0;
6627 switch (me_id) {
6628 case 0:
6629 /* This results in a full GPU reset, but all we need to do is soft
6630 * reset the CP for gfx
6631 */
6632 queue_reset = true;
6633 break;
6634 case 1:
6635 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04006636 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05006637 break;
6638 case 2:
6639 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04006640 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05006641 break;
6642 }
6643 break;
6644 case 185: /* CP Privileged inst */
6645 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04006646 /* XXX check the bitfield order! */
6647 me_id = (ring_id & 0x60) >> 5;
6648 pipe_id = (ring_id & 0x18) >> 3;
6649 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05006650 switch (me_id) {
6651 case 0:
6652 /* This results in a full GPU reset, but all we need to do is soft
6653 * reset the CP for gfx
6654 */
6655 queue_reset = true;
6656 break;
6657 case 1:
6658 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04006659 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05006660 break;
6661 case 2:
6662 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04006663 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05006664 break;
6665 }
6666 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04006667 case 224: /* SDMA trap event */
6668 /* XXX check the bitfield order! */
6669 me_id = (ring_id & 0x3) >> 0;
6670 queue_id = (ring_id & 0xc) >> 2;
6671 DRM_DEBUG("IH: SDMA trap\n");
6672 switch (me_id) {
6673 case 0:
6674 switch (queue_id) {
6675 case 0:
6676 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6677 break;
6678 case 1:
6679 /* XXX compute */
6680 break;
6681 case 2:
6682 /* XXX compute */
6683 break;
6684 }
6685 break;
6686 case 1:
6687 switch (queue_id) {
6688 case 0:
6689 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6690 break;
6691 case 1:
6692 /* XXX compute */
6693 break;
6694 case 2:
6695 /* XXX compute */
6696 break;
6697 }
6698 break;
6699 }
6700 break;
Alex Deucher41a524a2013-08-14 01:01:40 -04006701 case 230: /* thermal low to high */
6702 DRM_DEBUG("IH: thermal low to high\n");
6703 rdev->pm.dpm.thermal.high_to_low = false;
6704 queue_thermal = true;
6705 break;
6706 case 231: /* thermal high to low */
6707 DRM_DEBUG("IH: thermal high to low\n");
6708 rdev->pm.dpm.thermal.high_to_low = true;
6709 queue_thermal = true;
6710 break;
6711 case 233: /* GUI IDLE */
6712 DRM_DEBUG("IH: GUI idle\n");
6713 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04006714 case 241: /* SDMA Privileged inst */
6715 case 247: /* SDMA Privileged inst */
6716 DRM_ERROR("Illegal instruction in SDMA command stream\n");
6717 /* XXX check the bitfield order! */
6718 me_id = (ring_id & 0x3) >> 0;
6719 queue_id = (ring_id & 0xc) >> 2;
6720 switch (me_id) {
6721 case 0:
6722 switch (queue_id) {
6723 case 0:
6724 queue_reset = true;
6725 break;
6726 case 1:
6727 /* XXX compute */
6728 queue_reset = true;
6729 break;
6730 case 2:
6731 /* XXX compute */
6732 queue_reset = true;
6733 break;
6734 }
6735 break;
6736 case 1:
6737 switch (queue_id) {
6738 case 0:
6739 queue_reset = true;
6740 break;
6741 case 1:
6742 /* XXX compute */
6743 queue_reset = true;
6744 break;
6745 case 2:
6746 /* XXX compute */
6747 queue_reset = true;
6748 break;
6749 }
6750 break;
6751 }
6752 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05006753 default:
6754 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6755 break;
6756 }
6757
6758 /* wptr/rptr are in bytes! */
6759 rptr += 16;
6760 rptr &= rdev->ih.ptr_mask;
6761 }
6762 if (queue_hotplug)
6763 schedule_work(&rdev->hotplug_work);
6764 if (queue_reset)
6765 schedule_work(&rdev->reset_work);
Alex Deucher41a524a2013-08-14 01:01:40 -04006766 if (queue_thermal)
6767 schedule_work(&rdev->pm.dpm.thermal.work);
Alex Deuchera59781b2012-11-09 10:45:57 -05006768 rdev->ih.rptr = rptr;
6769 WREG32(IH_RB_RPTR, rdev->ih.rptr);
6770 atomic_set(&rdev->ih.lock, 0);
6771
6772 /* make sure wptr hasn't changed while processing */
6773 wptr = cik_get_ih_wptr(rdev);
6774 if (wptr != rptr)
6775 goto restart_ih;
6776
6777 return IRQ_HANDLED;
6778}
Alex Deucher7bf94a22012-08-17 11:48:29 -04006779
6780/*
6781 * startup/shutdown callbacks
6782 */
6783/**
6784 * cik_startup - program the asic to a functional state
6785 *
6786 * @rdev: radeon_device pointer
6787 *
6788 * Programs the asic to a functional state (CIK).
6789 * Called by cik_init() and cik_resume().
6790 * Returns 0 for success, error for failure.
6791 */
6792static int cik_startup(struct radeon_device *rdev)
6793{
6794 struct radeon_ring *ring;
6795 int r;
6796
Alex Deucher8a7cd272013-08-06 11:29:39 -04006797 /* enable pcie gen2/3 link */
6798 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04006799 /* enable aspm */
6800 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04006801
Alex Deucher6fab3feb2013-08-04 12:13:17 -04006802 cik_mc_program(rdev);
6803
Alex Deucher7bf94a22012-08-17 11:48:29 -04006804 if (rdev->flags & RADEON_IS_IGP) {
6805 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6806 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6807 r = cik_init_microcode(rdev);
6808 if (r) {
6809 DRM_ERROR("Failed to load firmware!\n");
6810 return r;
6811 }
6812 }
6813 } else {
6814 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6815 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6816 !rdev->mc_fw) {
6817 r = cik_init_microcode(rdev);
6818 if (r) {
6819 DRM_ERROR("Failed to load firmware!\n");
6820 return r;
6821 }
6822 }
6823
6824 r = ci_mc_load_microcode(rdev);
6825 if (r) {
6826 DRM_ERROR("Failed to load MC firmware!\n");
6827 return r;
6828 }
6829 }
6830
6831 r = r600_vram_scratch_init(rdev);
6832 if (r)
6833 return r;
6834
Alex Deucher7bf94a22012-08-17 11:48:29 -04006835 r = cik_pcie_gart_enable(rdev);
6836 if (r)
6837 return r;
6838 cik_gpu_init(rdev);
6839
6840 /* allocate rlc buffers */
Alex Deucher22c775c2013-07-23 09:41:05 -04006841 if (rdev->flags & RADEON_IS_IGP) {
6842 if (rdev->family == CHIP_KAVERI) {
6843 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
6844 rdev->rlc.reg_list_size =
6845 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
6846 } else {
6847 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
6848 rdev->rlc.reg_list_size =
6849 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
6850 }
6851 }
6852 rdev->rlc.cs_data = ci_cs_data;
6853 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
Alex Deucher1fd11772013-04-17 17:53:50 -04006854 r = sumo_rlc_init(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006855 if (r) {
6856 DRM_ERROR("Failed to init rlc BOs!\n");
6857 return r;
6858 }
6859
6860 /* allocate wb buffer */
6861 r = radeon_wb_init(rdev);
6862 if (r)
6863 return r;
6864
Alex Deucher963e81f2013-06-26 17:37:11 -04006865 /* allocate mec buffers */
6866 r = cik_mec_init(rdev);
6867 if (r) {
6868 DRM_ERROR("Failed to init MEC BOs!\n");
6869 return r;
6870 }
6871
Alex Deucher7bf94a22012-08-17 11:48:29 -04006872 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6873 if (r) {
6874 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6875 return r;
6876 }
6877
Alex Deucher963e81f2013-06-26 17:37:11 -04006878 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6879 if (r) {
6880 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6881 return r;
6882 }
6883
6884 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6885 if (r) {
6886 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6887 return r;
6888 }
6889
Alex Deucher7bf94a22012-08-17 11:48:29 -04006890 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6891 if (r) {
6892 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6893 return r;
6894 }
6895
6896 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6897 if (r) {
6898 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6899 return r;
6900 }
6901
Christian Könige409b122013-08-13 11:56:53 +02006902 r = uvd_v4_2_resume(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006903 if (!r) {
6904 r = radeon_fence_driver_start_ring(rdev,
6905 R600_RING_TYPE_UVD_INDEX);
6906 if (r)
6907 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6908 }
6909 if (r)
6910 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6911
Alex Deucher7bf94a22012-08-17 11:48:29 -04006912 /* Enable IRQ */
6913 if (!rdev->irq.installed) {
6914 r = radeon_irq_kms_init(rdev);
6915 if (r)
6916 return r;
6917 }
6918
6919 r = cik_irq_init(rdev);
6920 if (r) {
6921 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6922 radeon_irq_kms_fini(rdev);
6923 return r;
6924 }
6925 cik_irq_set(rdev);
6926
6927 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6928 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6929 CP_RB0_RPTR, CP_RB0_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02006930 RADEON_CP_PACKET2);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006931 if (r)
6932 return r;
6933
Alex Deucher963e81f2013-06-26 17:37:11 -04006934 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006935 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006936 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6937 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6938 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02006939 PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006940 if (r)
6941 return r;
6942 ring->me = 1; /* first MEC */
6943 ring->pipe = 0; /* first pipe */
6944 ring->queue = 0; /* first queue */
6945 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6946
Alex Deucher2615b532013-06-03 11:21:58 -04006947 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006948 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6949 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6950 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02006951 PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006952 if (r)
6953 return r;
6954 /* dGPU only have 1 MEC */
6955 ring->me = 1; /* first MEC */
6956 ring->pipe = 0; /* first pipe */
6957 ring->queue = 1; /* second queue */
6958 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6959
Alex Deucher7bf94a22012-08-17 11:48:29 -04006960 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6961 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6962 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6963 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
Christian König2e1e6da2013-08-13 11:56:52 +02006964 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
Alex Deucher7bf94a22012-08-17 11:48:29 -04006965 if (r)
6966 return r;
6967
6968 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6969 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6970 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6971 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
Christian König2e1e6da2013-08-13 11:56:52 +02006972 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
Alex Deucher7bf94a22012-08-17 11:48:29 -04006973 if (r)
6974 return r;
6975
6976 r = cik_cp_resume(rdev);
6977 if (r)
6978 return r;
6979
6980 r = cik_sdma_resume(rdev);
6981 if (r)
6982 return r;
6983
Christian König87167bb2013-04-09 13:39:21 -04006984 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6985 if (ring->ring_size) {
Christian König02c9f7f2013-08-13 11:56:51 +02006986 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
Christian König87167bb2013-04-09 13:39:21 -04006987 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02006988 RADEON_CP_PACKET2);
Christian König87167bb2013-04-09 13:39:21 -04006989 if (!r)
Christian Könige409b122013-08-13 11:56:53 +02006990 r = uvd_v1_0_init(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006991 if (r)
6992 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6993 }
6994
Alex Deucher7bf94a22012-08-17 11:48:29 -04006995 r = radeon_ib_pool_init(rdev);
6996 if (r) {
6997 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6998 return r;
6999 }
7000
7001 r = radeon_vm_manager_init(rdev);
7002 if (r) {
7003 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7004 return r;
7005 }
7006
Alex Deucherb5306022013-07-31 16:51:33 -04007007 r = dce6_audio_init(rdev);
7008 if (r)
7009 return r;
7010
Alex Deucher7bf94a22012-08-17 11:48:29 -04007011 return 0;
7012}
7013
7014/**
7015 * cik_resume - resume the asic to a functional state
7016 *
7017 * @rdev: radeon_device pointer
7018 *
7019 * Programs the asic to a functional state (CIK).
7020 * Called at resume.
7021 * Returns 0 for success, error for failure.
7022 */
7023int cik_resume(struct radeon_device *rdev)
7024{
7025 int r;
7026
7027 /* post card */
7028 atom_asic_init(rdev->mode_info.atom_context);
7029
Alex Deucher0aafd312013-04-09 14:43:30 -04007030 /* init golden registers */
7031 cik_init_golden_registers(rdev);
7032
Alex Deucher7bf94a22012-08-17 11:48:29 -04007033 rdev->accel_working = true;
7034 r = cik_startup(rdev);
7035 if (r) {
7036 DRM_ERROR("cik startup failed on resume\n");
7037 rdev->accel_working = false;
7038 return r;
7039 }
7040
7041 return r;
7042
7043}
7044
7045/**
7046 * cik_suspend - suspend the asic
7047 *
7048 * @rdev: radeon_device pointer
7049 *
7050 * Bring the chip into a state suitable for suspend (CIK).
7051 * Called at suspend.
7052 * Returns 0 for success.
7053 */
7054int cik_suspend(struct radeon_device *rdev)
7055{
Alex Deucherb5306022013-07-31 16:51:33 -04007056 dce6_audio_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007057 radeon_vm_manager_fini(rdev);
7058 cik_cp_enable(rdev, false);
7059 cik_sdma_enable(rdev, false);
Christian Könige409b122013-08-13 11:56:53 +02007060 uvd_v1_0_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007061 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007062 cik_irq_suspend(rdev);
7063 radeon_wb_disable(rdev);
7064 cik_pcie_gart_disable(rdev);
7065 return 0;
7066}
7067
7068/* Plan is to move initialization in that function and use
7069 * helper function so that radeon_device_init pretty much
7070 * do nothing more than calling asic specific function. This
7071 * should also allow to remove a bunch of callback function
7072 * like vram_info.
7073 */
7074/**
7075 * cik_init - asic specific driver and hw init
7076 *
7077 * @rdev: radeon_device pointer
7078 *
7079 * Setup asic specific driver variables and program the hw
7080 * to a functional state (CIK).
7081 * Called at driver startup.
7082 * Returns 0 for success, errors for failure.
7083 */
7084int cik_init(struct radeon_device *rdev)
7085{
7086 struct radeon_ring *ring;
7087 int r;
7088
7089 /* Read BIOS */
7090 if (!radeon_get_bios(rdev)) {
7091 if (ASIC_IS_AVIVO(rdev))
7092 return -EINVAL;
7093 }
7094 /* Must be an ATOMBIOS */
7095 if (!rdev->is_atom_bios) {
7096 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7097 return -EINVAL;
7098 }
7099 r = radeon_atombios_init(rdev);
7100 if (r)
7101 return r;
7102
7103 /* Post card if necessary */
7104 if (!radeon_card_posted(rdev)) {
7105 if (!rdev->bios) {
7106 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7107 return -EINVAL;
7108 }
7109 DRM_INFO("GPU not posted. posting now...\n");
7110 atom_asic_init(rdev->mode_info.atom_context);
7111 }
Alex Deucher0aafd312013-04-09 14:43:30 -04007112 /* init golden registers */
7113 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007114 /* Initialize scratch registers */
7115 cik_scratch_init(rdev);
7116 /* Initialize surface registers */
7117 radeon_surface_init(rdev);
7118 /* Initialize clocks */
7119 radeon_get_clock_info(rdev->ddev);
7120
7121 /* Fence driver */
7122 r = radeon_fence_driver_init(rdev);
7123 if (r)
7124 return r;
7125
7126 /* initialize memory controller */
7127 r = cik_mc_init(rdev);
7128 if (r)
7129 return r;
7130 /* Memory manager */
7131 r = radeon_bo_init(rdev);
7132 if (r)
7133 return r;
7134
7135 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7136 ring->ring_obj = NULL;
7137 r600_ring_init(rdev, ring, 1024 * 1024);
7138
Alex Deucher963e81f2013-06-26 17:37:11 -04007139 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7140 ring->ring_obj = NULL;
7141 r600_ring_init(rdev, ring, 1024 * 1024);
7142 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7143 if (r)
7144 return r;
7145
7146 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7147 ring->ring_obj = NULL;
7148 r600_ring_init(rdev, ring, 1024 * 1024);
7149 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7150 if (r)
7151 return r;
7152
Alex Deucher7bf94a22012-08-17 11:48:29 -04007153 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7154 ring->ring_obj = NULL;
7155 r600_ring_init(rdev, ring, 256 * 1024);
7156
7157 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7158 ring->ring_obj = NULL;
7159 r600_ring_init(rdev, ring, 256 * 1024);
7160
Christian König87167bb2013-04-09 13:39:21 -04007161 r = radeon_uvd_init(rdev);
7162 if (!r) {
7163 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7164 ring->ring_obj = NULL;
7165 r600_ring_init(rdev, ring, 4096);
7166 }
7167
Alex Deucher7bf94a22012-08-17 11:48:29 -04007168 rdev->ih.ring_obj = NULL;
7169 r600_ih_ring_init(rdev, 64 * 1024);
7170
7171 r = r600_pcie_gart_init(rdev);
7172 if (r)
7173 return r;
7174
7175 rdev->accel_working = true;
7176 r = cik_startup(rdev);
7177 if (r) {
7178 dev_err(rdev->dev, "disabling GPU acceleration\n");
7179 cik_cp_fini(rdev);
7180 cik_sdma_fini(rdev);
7181 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007182 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007183 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007184 radeon_wb_fini(rdev);
7185 radeon_ib_pool_fini(rdev);
7186 radeon_vm_manager_fini(rdev);
7187 radeon_irq_kms_fini(rdev);
7188 cik_pcie_gart_fini(rdev);
7189 rdev->accel_working = false;
7190 }
7191
7192 /* Don't start up if the MC ucode is missing.
7193 * The default clocks and voltages before the MC ucode
7194 * is loaded are not suffient for advanced operations.
7195 */
7196 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7197 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7198 return -EINVAL;
7199 }
7200
7201 return 0;
7202}
7203
7204/**
7205 * cik_fini - asic specific driver and hw fini
7206 *
7207 * @rdev: radeon_device pointer
7208 *
7209 * Tear down the asic specific driver variables and program the hw
7210 * to an idle state (CIK).
7211 * Called at driver unload.
7212 */
7213void cik_fini(struct radeon_device *rdev)
7214{
7215 cik_cp_fini(rdev);
7216 cik_sdma_fini(rdev);
7217 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007218 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007219 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007220 radeon_wb_fini(rdev);
7221 radeon_vm_manager_fini(rdev);
7222 radeon_ib_pool_fini(rdev);
7223 radeon_irq_kms_fini(rdev);
Christian Könige409b122013-08-13 11:56:53 +02007224 uvd_v1_0_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007225 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007226 cik_pcie_gart_fini(rdev);
7227 r600_vram_scratch_fini(rdev);
7228 radeon_gem_fini(rdev);
7229 radeon_fence_driver_fini(rdev);
7230 radeon_bo_fini(rdev);
7231 radeon_atombios_fini(rdev);
7232 kfree(rdev->bios);
7233 rdev->bios = NULL;
7234}
Alex Deuchercd84a272012-07-20 17:13:13 -04007235
7236/* display watermark setup */
7237/**
7238 * dce8_line_buffer_adjust - Set up the line buffer
7239 *
7240 * @rdev: radeon_device pointer
7241 * @radeon_crtc: the selected display controller
7242 * @mode: the current display mode on the selected display
7243 * controller
7244 *
7245 * Setup up the line buffer allocation for
7246 * the selected display controller (CIK).
7247 * Returns the line buffer size in pixels.
7248 */
7249static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7250 struct radeon_crtc *radeon_crtc,
7251 struct drm_display_mode *mode)
7252{
7253 u32 tmp;
7254
7255 /*
7256 * Line Buffer Setup
7257 * There are 6 line buffers, one for each display controllers.
7258 * There are 3 partitions per LB. Select the number of partitions
7259 * to enable based on the display width. For display widths larger
7260 * than 4096, you need use to use 2 display controllers and combine
7261 * them using the stereo blender.
7262 */
7263 if (radeon_crtc->base.enabled && mode) {
7264 if (mode->crtc_hdisplay < 1920)
7265 tmp = 1;
7266 else if (mode->crtc_hdisplay < 2560)
7267 tmp = 2;
7268 else if (mode->crtc_hdisplay < 4096)
7269 tmp = 0;
7270 else {
7271 DRM_DEBUG_KMS("Mode too big for LB!\n");
7272 tmp = 0;
7273 }
7274 } else
7275 tmp = 1;
7276
7277 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7278 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7279
7280 if (radeon_crtc->base.enabled && mode) {
7281 switch (tmp) {
7282 case 0:
7283 default:
7284 return 4096 * 2;
7285 case 1:
7286 return 1920 * 2;
7287 case 2:
7288 return 2560 * 2;
7289 }
7290 }
7291
7292 /* controller not enabled, so no lb used */
7293 return 0;
7294}
7295
7296/**
7297 * cik_get_number_of_dram_channels - get the number of dram channels
7298 *
7299 * @rdev: radeon_device pointer
7300 *
7301 * Look up the number of video ram channels (CIK).
7302 * Used for display watermark bandwidth calculations
7303 * Returns the number of dram channels
7304 */
7305static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7306{
7307 u32 tmp = RREG32(MC_SHARED_CHMAP);
7308
7309 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7310 case 0:
7311 default:
7312 return 1;
7313 case 1:
7314 return 2;
7315 case 2:
7316 return 4;
7317 case 3:
7318 return 8;
7319 case 4:
7320 return 3;
7321 case 5:
7322 return 6;
7323 case 6:
7324 return 10;
7325 case 7:
7326 return 12;
7327 case 8:
7328 return 16;
7329 }
7330}
7331
7332struct dce8_wm_params {
7333 u32 dram_channels; /* number of dram channels */
7334 u32 yclk; /* bandwidth per dram data pin in kHz */
7335 u32 sclk; /* engine clock in kHz */
7336 u32 disp_clk; /* display clock in kHz */
7337 u32 src_width; /* viewport width */
7338 u32 active_time; /* active display time in ns */
7339 u32 blank_time; /* blank time in ns */
7340 bool interlaced; /* mode is interlaced */
7341 fixed20_12 vsc; /* vertical scale ratio */
7342 u32 num_heads; /* number of active crtcs */
7343 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7344 u32 lb_size; /* line buffer allocated to pipe */
7345 u32 vtaps; /* vertical scaler taps */
7346};
7347
7348/**
7349 * dce8_dram_bandwidth - get the dram bandwidth
7350 *
7351 * @wm: watermark calculation data
7352 *
7353 * Calculate the raw dram bandwidth (CIK).
7354 * Used for display watermark bandwidth calculations
7355 * Returns the dram bandwidth in MBytes/s
7356 */
7357static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7358{
7359 /* Calculate raw DRAM Bandwidth */
7360 fixed20_12 dram_efficiency; /* 0.7 */
7361 fixed20_12 yclk, dram_channels, bandwidth;
7362 fixed20_12 a;
7363
7364 a.full = dfixed_const(1000);
7365 yclk.full = dfixed_const(wm->yclk);
7366 yclk.full = dfixed_div(yclk, a);
7367 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7368 a.full = dfixed_const(10);
7369 dram_efficiency.full = dfixed_const(7);
7370 dram_efficiency.full = dfixed_div(dram_efficiency, a);
7371 bandwidth.full = dfixed_mul(dram_channels, yclk);
7372 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7373
7374 return dfixed_trunc(bandwidth);
7375}
7376
7377/**
7378 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7379 *
7380 * @wm: watermark calculation data
7381 *
7382 * Calculate the dram bandwidth used for display (CIK).
7383 * Used for display watermark bandwidth calculations
7384 * Returns the dram bandwidth for display in MBytes/s
7385 */
7386static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7387{
7388 /* Calculate DRAM Bandwidth and the part allocated to display. */
7389 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7390 fixed20_12 yclk, dram_channels, bandwidth;
7391 fixed20_12 a;
7392
7393 a.full = dfixed_const(1000);
7394 yclk.full = dfixed_const(wm->yclk);
7395 yclk.full = dfixed_div(yclk, a);
7396 dram_channels.full = dfixed_const(wm->dram_channels * 4);
7397 a.full = dfixed_const(10);
7398 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7399 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7400 bandwidth.full = dfixed_mul(dram_channels, yclk);
7401 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7402
7403 return dfixed_trunc(bandwidth);
7404}
7405
7406/**
7407 * dce8_data_return_bandwidth - get the data return bandwidth
7408 *
7409 * @wm: watermark calculation data
7410 *
7411 * Calculate the data return bandwidth used for display (CIK).
7412 * Used for display watermark bandwidth calculations
7413 * Returns the data return bandwidth in MBytes/s
7414 */
7415static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7416{
7417 /* Calculate the display Data return Bandwidth */
7418 fixed20_12 return_efficiency; /* 0.8 */
7419 fixed20_12 sclk, bandwidth;
7420 fixed20_12 a;
7421
7422 a.full = dfixed_const(1000);
7423 sclk.full = dfixed_const(wm->sclk);
7424 sclk.full = dfixed_div(sclk, a);
7425 a.full = dfixed_const(10);
7426 return_efficiency.full = dfixed_const(8);
7427 return_efficiency.full = dfixed_div(return_efficiency, a);
7428 a.full = dfixed_const(32);
7429 bandwidth.full = dfixed_mul(a, sclk);
7430 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7431
7432 return dfixed_trunc(bandwidth);
7433}
7434
7435/**
7436 * dce8_dmif_request_bandwidth - get the dmif bandwidth
7437 *
7438 * @wm: watermark calculation data
7439 *
7440 * Calculate the dmif bandwidth used for display (CIK).
7441 * Used for display watermark bandwidth calculations
7442 * Returns the dmif bandwidth in MBytes/s
7443 */
7444static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7445{
7446 /* Calculate the DMIF Request Bandwidth */
7447 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7448 fixed20_12 disp_clk, bandwidth;
7449 fixed20_12 a, b;
7450
7451 a.full = dfixed_const(1000);
7452 disp_clk.full = dfixed_const(wm->disp_clk);
7453 disp_clk.full = dfixed_div(disp_clk, a);
7454 a.full = dfixed_const(32);
7455 b.full = dfixed_mul(a, disp_clk);
7456
7457 a.full = dfixed_const(10);
7458 disp_clk_request_efficiency.full = dfixed_const(8);
7459 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7460
7461 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7462
7463 return dfixed_trunc(bandwidth);
7464}
7465
7466/**
7467 * dce8_available_bandwidth - get the min available bandwidth
7468 *
7469 * @wm: watermark calculation data
7470 *
7471 * Calculate the min available bandwidth used for display (CIK).
7472 * Used for display watermark bandwidth calculations
7473 * Returns the min available bandwidth in MBytes/s
7474 */
7475static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7476{
7477 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7478 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7479 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7480 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7481
7482 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7483}
7484
7485/**
7486 * dce8_average_bandwidth - get the average available bandwidth
7487 *
7488 * @wm: watermark calculation data
7489 *
7490 * Calculate the average available bandwidth used for display (CIK).
7491 * Used for display watermark bandwidth calculations
7492 * Returns the average available bandwidth in MBytes/s
7493 */
7494static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7495{
7496 /* Calculate the display mode Average Bandwidth
7497 * DisplayMode should contain the source and destination dimensions,
7498 * timing, etc.
7499 */
7500 fixed20_12 bpp;
7501 fixed20_12 line_time;
7502 fixed20_12 src_width;
7503 fixed20_12 bandwidth;
7504 fixed20_12 a;
7505
7506 a.full = dfixed_const(1000);
7507 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7508 line_time.full = dfixed_div(line_time, a);
7509 bpp.full = dfixed_const(wm->bytes_per_pixel);
7510 src_width.full = dfixed_const(wm->src_width);
7511 bandwidth.full = dfixed_mul(src_width, bpp);
7512 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7513 bandwidth.full = dfixed_div(bandwidth, line_time);
7514
7515 return dfixed_trunc(bandwidth);
7516}
7517
7518/**
7519 * dce8_latency_watermark - get the latency watermark
7520 *
7521 * @wm: watermark calculation data
7522 *
7523 * Calculate the latency watermark (CIK).
7524 * Used for display watermark bandwidth calculations
7525 * Returns the latency watermark in ns
7526 */
7527static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7528{
7529 /* First calculate the latency in ns */
7530 u32 mc_latency = 2000; /* 2000 ns. */
7531 u32 available_bandwidth = dce8_available_bandwidth(wm);
7532 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7533 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7534 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7535 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7536 (wm->num_heads * cursor_line_pair_return_time);
7537 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7538 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7539 u32 tmp, dmif_size = 12288;
7540 fixed20_12 a, b, c;
7541
7542 if (wm->num_heads == 0)
7543 return 0;
7544
7545 a.full = dfixed_const(2);
7546 b.full = dfixed_const(1);
7547 if ((wm->vsc.full > a.full) ||
7548 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7549 (wm->vtaps >= 5) ||
7550 ((wm->vsc.full >= a.full) && wm->interlaced))
7551 max_src_lines_per_dst_line = 4;
7552 else
7553 max_src_lines_per_dst_line = 2;
7554
7555 a.full = dfixed_const(available_bandwidth);
7556 b.full = dfixed_const(wm->num_heads);
7557 a.full = dfixed_div(a, b);
7558
7559 b.full = dfixed_const(mc_latency + 512);
7560 c.full = dfixed_const(wm->disp_clk);
7561 b.full = dfixed_div(b, c);
7562
7563 c.full = dfixed_const(dmif_size);
7564 b.full = dfixed_div(c, b);
7565
7566 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7567
7568 b.full = dfixed_const(1000);
7569 c.full = dfixed_const(wm->disp_clk);
7570 b.full = dfixed_div(c, b);
7571 c.full = dfixed_const(wm->bytes_per_pixel);
7572 b.full = dfixed_mul(b, c);
7573
7574 lb_fill_bw = min(tmp, dfixed_trunc(b));
7575
7576 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7577 b.full = dfixed_const(1000);
7578 c.full = dfixed_const(lb_fill_bw);
7579 b.full = dfixed_div(c, b);
7580 a.full = dfixed_div(a, b);
7581 line_fill_time = dfixed_trunc(a);
7582
7583 if (line_fill_time < wm->active_time)
7584 return latency;
7585 else
7586 return latency + (line_fill_time - wm->active_time);
7587
7588}
7589
7590/**
7591 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7592 * average and available dram bandwidth
7593 *
7594 * @wm: watermark calculation data
7595 *
7596 * Check if the display average bandwidth fits in the display
7597 * dram bandwidth (CIK).
7598 * Used for display watermark bandwidth calculations
7599 * Returns true if the display fits, false if not.
7600 */
7601static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7602{
7603 if (dce8_average_bandwidth(wm) <=
7604 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7605 return true;
7606 else
7607 return false;
7608}
7609
7610/**
7611 * dce8_average_bandwidth_vs_available_bandwidth - check
7612 * average and available bandwidth
7613 *
7614 * @wm: watermark calculation data
7615 *
7616 * Check if the display average bandwidth fits in the display
7617 * available bandwidth (CIK).
7618 * Used for display watermark bandwidth calculations
7619 * Returns true if the display fits, false if not.
7620 */
7621static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7622{
7623 if (dce8_average_bandwidth(wm) <=
7624 (dce8_available_bandwidth(wm) / wm->num_heads))
7625 return true;
7626 else
7627 return false;
7628}
7629
7630/**
7631 * dce8_check_latency_hiding - check latency hiding
7632 *
7633 * @wm: watermark calculation data
7634 *
7635 * Check latency hiding (CIK).
7636 * Used for display watermark bandwidth calculations
7637 * Returns true if the display fits, false if not.
7638 */
7639static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7640{
7641 u32 lb_partitions = wm->lb_size / wm->src_width;
7642 u32 line_time = wm->active_time + wm->blank_time;
7643 u32 latency_tolerant_lines;
7644 u32 latency_hiding;
7645 fixed20_12 a;
7646
7647 a.full = dfixed_const(1);
7648 if (wm->vsc.full > a.full)
7649 latency_tolerant_lines = 1;
7650 else {
7651 if (lb_partitions <= (wm->vtaps + 1))
7652 latency_tolerant_lines = 1;
7653 else
7654 latency_tolerant_lines = 2;
7655 }
7656
7657 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7658
7659 if (dce8_latency_watermark(wm) <= latency_hiding)
7660 return true;
7661 else
7662 return false;
7663}
7664
7665/**
7666 * dce8_program_watermarks - program display watermarks
7667 *
7668 * @rdev: radeon_device pointer
7669 * @radeon_crtc: the selected display controller
7670 * @lb_size: line buffer size
7671 * @num_heads: number of display controllers in use
7672 *
7673 * Calculate and program the display watermarks for the
7674 * selected display controller (CIK).
7675 */
7676static void dce8_program_watermarks(struct radeon_device *rdev,
7677 struct radeon_crtc *radeon_crtc,
7678 u32 lb_size, u32 num_heads)
7679{
7680 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05007681 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04007682 u32 pixel_period;
7683 u32 line_time = 0;
7684 u32 latency_watermark_a = 0, latency_watermark_b = 0;
7685 u32 tmp, wm_mask;
7686
7687 if (radeon_crtc->base.enabled && num_heads && mode) {
7688 pixel_period = 1000000 / (u32)mode->clock;
7689 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7690
Alex Deucher58ea2de2013-01-24 10:03:39 -05007691 /* watermark for high clocks */
7692 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7693 rdev->pm.dpm_enabled) {
7694 wm_high.yclk =
7695 radeon_dpm_get_mclk(rdev, false) * 10;
7696 wm_high.sclk =
7697 radeon_dpm_get_sclk(rdev, false) * 10;
7698 } else {
7699 wm_high.yclk = rdev->pm.current_mclk * 10;
7700 wm_high.sclk = rdev->pm.current_sclk * 10;
7701 }
7702
7703 wm_high.disp_clk = mode->clock;
7704 wm_high.src_width = mode->crtc_hdisplay;
7705 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7706 wm_high.blank_time = line_time - wm_high.active_time;
7707 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04007708 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05007709 wm_high.interlaced = true;
7710 wm_high.vsc = radeon_crtc->vsc;
7711 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04007712 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05007713 wm_high.vtaps = 2;
7714 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7715 wm_high.lb_size = lb_size;
7716 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7717 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04007718
7719 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05007720 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04007721
7722 /* possibly force display priority to high */
7723 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05007724 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7725 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7726 !dce8_check_latency_hiding(&wm_high) ||
7727 (rdev->disp_priority == 2)) {
7728 DRM_DEBUG_KMS("force priority to high\n");
7729 }
7730
7731 /* watermark for low clocks */
7732 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7733 rdev->pm.dpm_enabled) {
7734 wm_low.yclk =
7735 radeon_dpm_get_mclk(rdev, true) * 10;
7736 wm_low.sclk =
7737 radeon_dpm_get_sclk(rdev, true) * 10;
7738 } else {
7739 wm_low.yclk = rdev->pm.current_mclk * 10;
7740 wm_low.sclk = rdev->pm.current_sclk * 10;
7741 }
7742
7743 wm_low.disp_clk = mode->clock;
7744 wm_low.src_width = mode->crtc_hdisplay;
7745 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7746 wm_low.blank_time = line_time - wm_low.active_time;
7747 wm_low.interlaced = false;
7748 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7749 wm_low.interlaced = true;
7750 wm_low.vsc = radeon_crtc->vsc;
7751 wm_low.vtaps = 1;
7752 if (radeon_crtc->rmx_type != RMX_OFF)
7753 wm_low.vtaps = 2;
7754 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7755 wm_low.lb_size = lb_size;
7756 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7757 wm_low.num_heads = num_heads;
7758
7759 /* set for low clocks */
7760 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7761
7762 /* possibly force display priority to high */
7763 /* should really do this at mode validation time... */
7764 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7765 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7766 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04007767 (rdev->disp_priority == 2)) {
7768 DRM_DEBUG_KMS("force priority to high\n");
7769 }
7770 }
7771
7772 /* select wm A */
7773 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7774 tmp = wm_mask;
7775 tmp &= ~LATENCY_WATERMARK_MASK(3);
7776 tmp |= LATENCY_WATERMARK_MASK(1);
7777 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7778 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7779 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7780 LATENCY_HIGH_WATERMARK(line_time)));
7781 /* select wm B */
7782 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7783 tmp &= ~LATENCY_WATERMARK_MASK(3);
7784 tmp |= LATENCY_WATERMARK_MASK(2);
7785 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7786 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7787 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7788 LATENCY_HIGH_WATERMARK(line_time)));
7789 /* restore original selection */
7790 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05007791
7792 /* save values for DPM */
7793 radeon_crtc->line_time = line_time;
7794 radeon_crtc->wm_high = latency_watermark_a;
7795 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04007796}
7797
7798/**
7799 * dce8_bandwidth_update - program display watermarks
7800 *
7801 * @rdev: radeon_device pointer
7802 *
7803 * Calculate and program the display watermarks and line
7804 * buffer allocation (CIK).
7805 */
7806void dce8_bandwidth_update(struct radeon_device *rdev)
7807{
7808 struct drm_display_mode *mode = NULL;
7809 u32 num_heads = 0, lb_size;
7810 int i;
7811
7812 radeon_update_display_priority(rdev);
7813
7814 for (i = 0; i < rdev->num_crtc; i++) {
7815 if (rdev->mode_info.crtcs[i]->base.enabled)
7816 num_heads++;
7817 }
7818 for (i = 0; i < rdev->num_crtc; i++) {
7819 mode = &rdev->mode_info.crtcs[i]->base.mode;
7820 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
7821 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
7822 }
7823}
Alex Deucher44fa3462012-12-18 22:17:00 -05007824
7825/**
7826 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
7827 *
7828 * @rdev: radeon_device pointer
7829 *
7830 * Fetches a GPU clock counter snapshot (SI).
7831 * Returns the 64 bit clock counter snapshot.
7832 */
7833uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
7834{
7835 uint64_t clock;
7836
7837 mutex_lock(&rdev->gpu_clock_mutex);
7838 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7839 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7840 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7841 mutex_unlock(&rdev->gpu_clock_mutex);
7842 return clock;
7843}
7844
Christian König87167bb2013-04-09 13:39:21 -04007845static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
7846 u32 cntl_reg, u32 status_reg)
7847{
7848 int r, i;
7849 struct atom_clock_dividers dividers;
7850 uint32_t tmp;
7851
7852 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
7853 clock, false, &dividers);
7854 if (r)
7855 return r;
7856
7857 tmp = RREG32_SMC(cntl_reg);
7858 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
7859 tmp |= dividers.post_divider;
7860 WREG32_SMC(cntl_reg, tmp);
7861
7862 for (i = 0; i < 100; i++) {
7863 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7864 break;
7865 mdelay(10);
7866 }
7867 if (i == 100)
7868 return -ETIMEDOUT;
7869
7870 return 0;
7871}
7872
7873int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7874{
7875 int r = 0;
7876
7877 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7878 if (r)
7879 return r;
7880
7881 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7882 return r;
7883}
7884
Alex Deucher8a7cd272013-08-06 11:29:39 -04007885static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7886{
7887 struct pci_dev *root = rdev->pdev->bus->self;
7888 int bridge_pos, gpu_pos;
7889 u32 speed_cntl, mask, current_data_rate;
7890 int ret, i;
7891 u16 tmp16;
7892
7893 if (radeon_pcie_gen2 == 0)
7894 return;
7895
7896 if (rdev->flags & RADEON_IS_IGP)
7897 return;
7898
7899 if (!(rdev->flags & RADEON_IS_PCIE))
7900 return;
7901
7902 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7903 if (ret != 0)
7904 return;
7905
7906 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7907 return;
7908
7909 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7910 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7911 LC_CURRENT_DATA_RATE_SHIFT;
7912 if (mask & DRM_PCIE_SPEED_80) {
7913 if (current_data_rate == 2) {
7914 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7915 return;
7916 }
7917 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7918 } else if (mask & DRM_PCIE_SPEED_50) {
7919 if (current_data_rate == 1) {
7920 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7921 return;
7922 }
7923 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7924 }
7925
7926 bridge_pos = pci_pcie_cap(root);
7927 if (!bridge_pos)
7928 return;
7929
7930 gpu_pos = pci_pcie_cap(rdev->pdev);
7931 if (!gpu_pos)
7932 return;
7933
7934 if (mask & DRM_PCIE_SPEED_80) {
7935 /* re-try equalization if gen3 is not already enabled */
7936 if (current_data_rate != 2) {
7937 u16 bridge_cfg, gpu_cfg;
7938 u16 bridge_cfg2, gpu_cfg2;
7939 u32 max_lw, current_lw, tmp;
7940
7941 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7942 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7943
7944 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7945 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7946
7947 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7948 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7949
7950 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7951 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7952 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7953
7954 if (current_lw < max_lw) {
7955 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7956 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7957 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7958 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7959 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7960 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7961 }
7962 }
7963
7964 for (i = 0; i < 10; i++) {
7965 /* check status */
7966 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7967 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7968 break;
7969
7970 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7971 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7972
7973 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7974 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7975
7976 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7977 tmp |= LC_SET_QUIESCE;
7978 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7979
7980 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7981 tmp |= LC_REDO_EQ;
7982 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7983
7984 mdelay(100);
7985
7986 /* linkctl */
7987 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7988 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7989 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7990 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7991
7992 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7993 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7994 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7995 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7996
7997 /* linkctl2 */
7998 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7999 tmp16 &= ~((1 << 4) | (7 << 9));
8000 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8001 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8002
8003 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8004 tmp16 &= ~((1 << 4) | (7 << 9));
8005 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8006 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8007
8008 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8009 tmp &= ~LC_SET_QUIESCE;
8010 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8011 }
8012 }
8013 }
8014
8015 /* set the link speed */
8016 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8017 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8018 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8019
8020 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8021 tmp16 &= ~0xf;
8022 if (mask & DRM_PCIE_SPEED_80)
8023 tmp16 |= 3; /* gen3 */
8024 else if (mask & DRM_PCIE_SPEED_50)
8025 tmp16 |= 2; /* gen2 */
8026 else
8027 tmp16 |= 1; /* gen1 */
8028 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8029
8030 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8031 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8032 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8033
8034 for (i = 0; i < rdev->usec_timeout; i++) {
8035 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8036 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8037 break;
8038 udelay(1);
8039 }
8040}
Alex Deucher7235711a42013-04-04 13:58:09 -04008041
8042static void cik_program_aspm(struct radeon_device *rdev)
8043{
8044 u32 data, orig;
8045 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8046 bool disable_clkreq = false;
8047
8048 if (radeon_aspm == 0)
8049 return;
8050
8051 /* XXX double check IGPs */
8052 if (rdev->flags & RADEON_IS_IGP)
8053 return;
8054
8055 if (!(rdev->flags & RADEON_IS_PCIE))
8056 return;
8057
8058 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8059 data &= ~LC_XMIT_N_FTS_MASK;
8060 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8061 if (orig != data)
8062 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8063
8064 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8065 data |= LC_GO_TO_RECOVERY;
8066 if (orig != data)
8067 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8068
8069 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8070 data |= P_IGNORE_EDB_ERR;
8071 if (orig != data)
8072 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8073
8074 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8075 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8076 data |= LC_PMI_TO_L1_DIS;
8077 if (!disable_l0s)
8078 data |= LC_L0S_INACTIVITY(7);
8079
8080 if (!disable_l1) {
8081 data |= LC_L1_INACTIVITY(7);
8082 data &= ~LC_PMI_TO_L1_DIS;
8083 if (orig != data)
8084 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8085
8086 if (!disable_plloff_in_l1) {
8087 bool clk_req_support;
8088
8089 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8090 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8091 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8092 if (orig != data)
8093 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8094
8095 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8096 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8097 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8098 if (orig != data)
8099 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8100
8101 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8102 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8103 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8104 if (orig != data)
8105 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8106
8107 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8108 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8109 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8110 if (orig != data)
8111 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8112
8113 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8114 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8115 data |= LC_DYN_LANES_PWR_STATE(3);
8116 if (orig != data)
8117 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8118
8119 if (!disable_clkreq) {
8120 struct pci_dev *root = rdev->pdev->bus->self;
8121 u32 lnkcap;
8122
8123 clk_req_support = false;
8124 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8125 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8126 clk_req_support = true;
8127 } else {
8128 clk_req_support = false;
8129 }
8130
8131 if (clk_req_support) {
8132 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8133 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8134 if (orig != data)
8135 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8136
8137 orig = data = RREG32_SMC(THM_CLK_CNTL);
8138 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8139 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8140 if (orig != data)
8141 WREG32_SMC(THM_CLK_CNTL, data);
8142
8143 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8144 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8145 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8146 if (orig != data)
8147 WREG32_SMC(MISC_CLK_CTRL, data);
8148
8149 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8150 data &= ~BCLK_AS_XCLK;
8151 if (orig != data)
8152 WREG32_SMC(CG_CLKPIN_CNTL, data);
8153
8154 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8155 data &= ~FORCE_BIF_REFCLK_EN;
8156 if (orig != data)
8157 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8158
8159 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8160 data &= ~MPLL_CLKOUT_SEL_MASK;
8161 data |= MPLL_CLKOUT_SEL(4);
8162 if (orig != data)
8163 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8164 }
8165 }
8166 } else {
8167 if (orig != data)
8168 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8169 }
8170
8171 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8172 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8173 if (orig != data)
8174 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8175
8176 if (!disable_l0s) {
8177 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8178 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8179 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8180 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8181 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8182 data &= ~LC_L0S_INACTIVITY_MASK;
8183 if (orig != data)
8184 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8185 }
8186 }
8187 }
8188}