blob: d0804f79efed538c2bc51ab91a1521e9ff467ccc [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher22c775c2013-07-23 09:41:05 -040034#include "clearstate_ci.h"
Alex Deucher02c81322012-12-18 21:43:07 -050035
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040042MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050043MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
44MODULE_FIRMWARE("radeon/KAVERI_me.bin");
45MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
46MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
47MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040048MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050049MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
50MODULE_FIRMWARE("radeon/KABINI_me.bin");
51MODULE_FIRMWARE("radeon/KABINI_ce.bin");
52MODULE_FIRMWARE("radeon/KABINI_mec.bin");
53MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040054MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050055
Alex Deuchera59781b2012-11-09 10:45:57 -050056extern int r600_ih_ring_alloc(struct radeon_device *rdev);
57extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040058extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
59extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040060extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -040061extern void sumo_rlc_fini(struct radeon_device *rdev);
62extern int sumo_rlc_init(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040063extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher866d83d2013-04-15 17:13:29 -040064extern void si_rlc_reset(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040065extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040066static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040067static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040068static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040069static void cik_init_pg(struct radeon_device *rdev);
70static void cik_init_cg(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040071
Alex Deucher286d9cc2013-06-21 15:50:47 -040072/* get temperature in millidegrees */
73int ci_get_temp(struct radeon_device *rdev)
74{
75 u32 temp;
76 int actual_temp = 0;
77
78 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
79 CTF_TEMP_SHIFT;
80
81 if (temp & 0x200)
82 actual_temp = 255;
83 else
84 actual_temp = temp & 0x1ff;
85
86 actual_temp = actual_temp * 1000;
87
88 return actual_temp;
89}
90
91/* get temperature in millidegrees */
92int kv_get_temp(struct radeon_device *rdev)
93{
94 u32 temp;
95 int actual_temp = 0;
96
97 temp = RREG32_SMC(0xC0300E0C);
98
99 if (temp)
100 actual_temp = (temp / 8) - 49;
101 else
102 actual_temp = 0;
103
104 actual_temp = actual_temp * 1000;
105
106 return actual_temp;
107}
108
Alex Deucher6e2c3c02013-04-03 19:28:32 -0400109/*
110 * Indirect registers accessor
111 */
112u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
113{
114 u32 r;
115
116 WREG32(PCIE_INDEX, reg);
117 (void)RREG32(PCIE_INDEX);
118 r = RREG32(PCIE_DATA);
119 return r;
120}
121
122void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
123{
124 WREG32(PCIE_INDEX, reg);
125 (void)RREG32(PCIE_INDEX);
126 WREG32(PCIE_DATA, v);
127 (void)RREG32(PCIE_DATA);
128}
129
Alex Deucher22c775c2013-07-23 09:41:05 -0400130static const u32 spectre_rlc_save_restore_register_list[] =
131{
132 (0x0e00 << 16) | (0xc12c >> 2),
133 0x00000000,
134 (0x0e00 << 16) | (0xc140 >> 2),
135 0x00000000,
136 (0x0e00 << 16) | (0xc150 >> 2),
137 0x00000000,
138 (0x0e00 << 16) | (0xc15c >> 2),
139 0x00000000,
140 (0x0e00 << 16) | (0xc168 >> 2),
141 0x00000000,
142 (0x0e00 << 16) | (0xc170 >> 2),
143 0x00000000,
144 (0x0e00 << 16) | (0xc178 >> 2),
145 0x00000000,
146 (0x0e00 << 16) | (0xc204 >> 2),
147 0x00000000,
148 (0x0e00 << 16) | (0xc2b4 >> 2),
149 0x00000000,
150 (0x0e00 << 16) | (0xc2b8 >> 2),
151 0x00000000,
152 (0x0e00 << 16) | (0xc2bc >> 2),
153 0x00000000,
154 (0x0e00 << 16) | (0xc2c0 >> 2),
155 0x00000000,
156 (0x0e00 << 16) | (0x8228 >> 2),
157 0x00000000,
158 (0x0e00 << 16) | (0x829c >> 2),
159 0x00000000,
160 (0x0e00 << 16) | (0x869c >> 2),
161 0x00000000,
162 (0x0600 << 16) | (0x98f4 >> 2),
163 0x00000000,
164 (0x0e00 << 16) | (0x98f8 >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0x9900 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc260 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0x90e8 >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0x3c000 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0x3c00c >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0x8c1c >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0x9700 >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0xcd20 >> 2),
181 0x00000000,
182 (0x4e00 << 16) | (0xcd20 >> 2),
183 0x00000000,
184 (0x5e00 << 16) | (0xcd20 >> 2),
185 0x00000000,
186 (0x6e00 << 16) | (0xcd20 >> 2),
187 0x00000000,
188 (0x7e00 << 16) | (0xcd20 >> 2),
189 0x00000000,
190 (0x8e00 << 16) | (0xcd20 >> 2),
191 0x00000000,
192 (0x9e00 << 16) | (0xcd20 >> 2),
193 0x00000000,
194 (0xae00 << 16) | (0xcd20 >> 2),
195 0x00000000,
196 (0xbe00 << 16) | (0xcd20 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0x89bc >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0x8900 >> 2),
201 0x00000000,
202 0x3,
203 (0x0e00 << 16) | (0xc130 >> 2),
204 0x00000000,
205 (0x0e00 << 16) | (0xc134 >> 2),
206 0x00000000,
207 (0x0e00 << 16) | (0xc1fc >> 2),
208 0x00000000,
209 (0x0e00 << 16) | (0xc208 >> 2),
210 0x00000000,
211 (0x0e00 << 16) | (0xc264 >> 2),
212 0x00000000,
213 (0x0e00 << 16) | (0xc268 >> 2),
214 0x00000000,
215 (0x0e00 << 16) | (0xc26c >> 2),
216 0x00000000,
217 (0x0e00 << 16) | (0xc270 >> 2),
218 0x00000000,
219 (0x0e00 << 16) | (0xc274 >> 2),
220 0x00000000,
221 (0x0e00 << 16) | (0xc278 >> 2),
222 0x00000000,
223 (0x0e00 << 16) | (0xc27c >> 2),
224 0x00000000,
225 (0x0e00 << 16) | (0xc280 >> 2),
226 0x00000000,
227 (0x0e00 << 16) | (0xc284 >> 2),
228 0x00000000,
229 (0x0e00 << 16) | (0xc288 >> 2),
230 0x00000000,
231 (0x0e00 << 16) | (0xc28c >> 2),
232 0x00000000,
233 (0x0e00 << 16) | (0xc290 >> 2),
234 0x00000000,
235 (0x0e00 << 16) | (0xc294 >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc298 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc29c >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc2a0 >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc2a4 >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc2a8 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc2ac >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc2b0 >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0x301d0 >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0x30238 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0x30250 >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0x30254 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0x30258 >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0x3025c >> 2),
262 0x00000000,
263 (0x4e00 << 16) | (0xc900 >> 2),
264 0x00000000,
265 (0x5e00 << 16) | (0xc900 >> 2),
266 0x00000000,
267 (0x6e00 << 16) | (0xc900 >> 2),
268 0x00000000,
269 (0x7e00 << 16) | (0xc900 >> 2),
270 0x00000000,
271 (0x8e00 << 16) | (0xc900 >> 2),
272 0x00000000,
273 (0x9e00 << 16) | (0xc900 >> 2),
274 0x00000000,
275 (0xae00 << 16) | (0xc900 >> 2),
276 0x00000000,
277 (0xbe00 << 16) | (0xc900 >> 2),
278 0x00000000,
279 (0x4e00 << 16) | (0xc904 >> 2),
280 0x00000000,
281 (0x5e00 << 16) | (0xc904 >> 2),
282 0x00000000,
283 (0x6e00 << 16) | (0xc904 >> 2),
284 0x00000000,
285 (0x7e00 << 16) | (0xc904 >> 2),
286 0x00000000,
287 (0x8e00 << 16) | (0xc904 >> 2),
288 0x00000000,
289 (0x9e00 << 16) | (0xc904 >> 2),
290 0x00000000,
291 (0xae00 << 16) | (0xc904 >> 2),
292 0x00000000,
293 (0xbe00 << 16) | (0xc904 >> 2),
294 0x00000000,
295 (0x4e00 << 16) | (0xc908 >> 2),
296 0x00000000,
297 (0x5e00 << 16) | (0xc908 >> 2),
298 0x00000000,
299 (0x6e00 << 16) | (0xc908 >> 2),
300 0x00000000,
301 (0x7e00 << 16) | (0xc908 >> 2),
302 0x00000000,
303 (0x8e00 << 16) | (0xc908 >> 2),
304 0x00000000,
305 (0x9e00 << 16) | (0xc908 >> 2),
306 0x00000000,
307 (0xae00 << 16) | (0xc908 >> 2),
308 0x00000000,
309 (0xbe00 << 16) | (0xc908 >> 2),
310 0x00000000,
311 (0x4e00 << 16) | (0xc90c >> 2),
312 0x00000000,
313 (0x5e00 << 16) | (0xc90c >> 2),
314 0x00000000,
315 (0x6e00 << 16) | (0xc90c >> 2),
316 0x00000000,
317 (0x7e00 << 16) | (0xc90c >> 2),
318 0x00000000,
319 (0x8e00 << 16) | (0xc90c >> 2),
320 0x00000000,
321 (0x9e00 << 16) | (0xc90c >> 2),
322 0x00000000,
323 (0xae00 << 16) | (0xc90c >> 2),
324 0x00000000,
325 (0xbe00 << 16) | (0xc90c >> 2),
326 0x00000000,
327 (0x4e00 << 16) | (0xc910 >> 2),
328 0x00000000,
329 (0x5e00 << 16) | (0xc910 >> 2),
330 0x00000000,
331 (0x6e00 << 16) | (0xc910 >> 2),
332 0x00000000,
333 (0x7e00 << 16) | (0xc910 >> 2),
334 0x00000000,
335 (0x8e00 << 16) | (0xc910 >> 2),
336 0x00000000,
337 (0x9e00 << 16) | (0xc910 >> 2),
338 0x00000000,
339 (0xae00 << 16) | (0xc910 >> 2),
340 0x00000000,
341 (0xbe00 << 16) | (0xc910 >> 2),
342 0x00000000,
343 (0x0e00 << 16) | (0xc99c >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0x9834 >> 2),
346 0x00000000,
347 (0x0000 << 16) | (0x30f00 >> 2),
348 0x00000000,
349 (0x0001 << 16) | (0x30f00 >> 2),
350 0x00000000,
351 (0x0000 << 16) | (0x30f04 >> 2),
352 0x00000000,
353 (0x0001 << 16) | (0x30f04 >> 2),
354 0x00000000,
355 (0x0000 << 16) | (0x30f08 >> 2),
356 0x00000000,
357 (0x0001 << 16) | (0x30f08 >> 2),
358 0x00000000,
359 (0x0000 << 16) | (0x30f0c >> 2),
360 0x00000000,
361 (0x0001 << 16) | (0x30f0c >> 2),
362 0x00000000,
363 (0x0600 << 16) | (0x9b7c >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0x8a14 >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0x8a18 >> 2),
368 0x00000000,
369 (0x0600 << 16) | (0x30a00 >> 2),
370 0x00000000,
371 (0x0e00 << 16) | (0x8bf0 >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0x8bcc >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0x8b24 >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0x30a04 >> 2),
378 0x00000000,
379 (0x0600 << 16) | (0x30a10 >> 2),
380 0x00000000,
381 (0x0600 << 16) | (0x30a14 >> 2),
382 0x00000000,
383 (0x0600 << 16) | (0x30a18 >> 2),
384 0x00000000,
385 (0x0600 << 16) | (0x30a2c >> 2),
386 0x00000000,
387 (0x0e00 << 16) | (0xc700 >> 2),
388 0x00000000,
389 (0x0e00 << 16) | (0xc704 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0xc708 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0xc768 >> 2),
394 0x00000000,
395 (0x0400 << 16) | (0xc770 >> 2),
396 0x00000000,
397 (0x0400 << 16) | (0xc774 >> 2),
398 0x00000000,
399 (0x0400 << 16) | (0xc778 >> 2),
400 0x00000000,
401 (0x0400 << 16) | (0xc77c >> 2),
402 0x00000000,
403 (0x0400 << 16) | (0xc780 >> 2),
404 0x00000000,
405 (0x0400 << 16) | (0xc784 >> 2),
406 0x00000000,
407 (0x0400 << 16) | (0xc788 >> 2),
408 0x00000000,
409 (0x0400 << 16) | (0xc78c >> 2),
410 0x00000000,
411 (0x0400 << 16) | (0xc798 >> 2),
412 0x00000000,
413 (0x0400 << 16) | (0xc79c >> 2),
414 0x00000000,
415 (0x0400 << 16) | (0xc7a0 >> 2),
416 0x00000000,
417 (0x0400 << 16) | (0xc7a4 >> 2),
418 0x00000000,
419 (0x0400 << 16) | (0xc7a8 >> 2),
420 0x00000000,
421 (0x0400 << 16) | (0xc7ac >> 2),
422 0x00000000,
423 (0x0400 << 16) | (0xc7b0 >> 2),
424 0x00000000,
425 (0x0400 << 16) | (0xc7b4 >> 2),
426 0x00000000,
427 (0x0e00 << 16) | (0x9100 >> 2),
428 0x00000000,
429 (0x0e00 << 16) | (0x3c010 >> 2),
430 0x00000000,
431 (0x0e00 << 16) | (0x92a8 >> 2),
432 0x00000000,
433 (0x0e00 << 16) | (0x92ac >> 2),
434 0x00000000,
435 (0x0e00 << 16) | (0x92b4 >> 2),
436 0x00000000,
437 (0x0e00 << 16) | (0x92b8 >> 2),
438 0x00000000,
439 (0x0e00 << 16) | (0x92bc >> 2),
440 0x00000000,
441 (0x0e00 << 16) | (0x92c0 >> 2),
442 0x00000000,
443 (0x0e00 << 16) | (0x92c4 >> 2),
444 0x00000000,
445 (0x0e00 << 16) | (0x92c8 >> 2),
446 0x00000000,
447 (0x0e00 << 16) | (0x92cc >> 2),
448 0x00000000,
449 (0x0e00 << 16) | (0x92d0 >> 2),
450 0x00000000,
451 (0x0e00 << 16) | (0x8c00 >> 2),
452 0x00000000,
453 (0x0e00 << 16) | (0x8c04 >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0x8c20 >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x8c38 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x8c3c >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0xae00 >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0x9604 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0xac08 >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0xac0c >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0xac10 >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0xac14 >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0xac58 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0xac68 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0xac6c >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0xac70 >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0xac74 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0xac78 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0xac7c >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0xac80 >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0xac84 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xac88 >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xac8c >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0x970c >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0x9714 >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0x9718 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0x971c >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0x31068 >> 2),
504 0x00000000,
505 (0x4e00 << 16) | (0x31068 >> 2),
506 0x00000000,
507 (0x5e00 << 16) | (0x31068 >> 2),
508 0x00000000,
509 (0x6e00 << 16) | (0x31068 >> 2),
510 0x00000000,
511 (0x7e00 << 16) | (0x31068 >> 2),
512 0x00000000,
513 (0x8e00 << 16) | (0x31068 >> 2),
514 0x00000000,
515 (0x9e00 << 16) | (0x31068 >> 2),
516 0x00000000,
517 (0xae00 << 16) | (0x31068 >> 2),
518 0x00000000,
519 (0xbe00 << 16) | (0x31068 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0xcd10 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xcd14 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0x88b0 >> 2),
526 0x00000000,
527 (0x0e00 << 16) | (0x88b4 >> 2),
528 0x00000000,
529 (0x0e00 << 16) | (0x88b8 >> 2),
530 0x00000000,
531 (0x0e00 << 16) | (0x88bc >> 2),
532 0x00000000,
533 (0x0400 << 16) | (0x89c0 >> 2),
534 0x00000000,
535 (0x0e00 << 16) | (0x88c4 >> 2),
536 0x00000000,
537 (0x0e00 << 16) | (0x88c8 >> 2),
538 0x00000000,
539 (0x0e00 << 16) | (0x88d0 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0x88d4 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0x88d8 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x8980 >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x30938 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x3093c >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x30940 >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0x89a0 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x30900 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x30904 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x89b4 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x3c210 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x3c214 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x3c218 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x8904 >> 2),
568 0x00000000,
569 0x5,
570 (0x0e00 << 16) | (0x8c28 >> 2),
571 (0x0e00 << 16) | (0x8c2c >> 2),
572 (0x0e00 << 16) | (0x8c30 >> 2),
573 (0x0e00 << 16) | (0x8c34 >> 2),
574 (0x0e00 << 16) | (0x9600 >> 2),
575};
576
577static const u32 kalindi_rlc_save_restore_register_list[] =
578{
579 (0x0e00 << 16) | (0xc12c >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0xc140 >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0xc150 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0xc15c >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0xc168 >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0xc170 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0xc204 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0xc2b4 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0xc2b8 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0xc2bc >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0xc2c0 >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0x8228 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0x829c >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0x869c >> 2),
606 0x00000000,
607 (0x0600 << 16) | (0x98f4 >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0x98f8 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0x9900 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc260 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0x90e8 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0x3c000 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0x3c00c >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0x8c1c >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0x9700 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xcd20 >> 2),
626 0x00000000,
627 (0x4e00 << 16) | (0xcd20 >> 2),
628 0x00000000,
629 (0x5e00 << 16) | (0xcd20 >> 2),
630 0x00000000,
631 (0x6e00 << 16) | (0xcd20 >> 2),
632 0x00000000,
633 (0x7e00 << 16) | (0xcd20 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x89bc >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x8900 >> 2),
638 0x00000000,
639 0x3,
640 (0x0e00 << 16) | (0xc130 >> 2),
641 0x00000000,
642 (0x0e00 << 16) | (0xc134 >> 2),
643 0x00000000,
644 (0x0e00 << 16) | (0xc1fc >> 2),
645 0x00000000,
646 (0x0e00 << 16) | (0xc208 >> 2),
647 0x00000000,
648 (0x0e00 << 16) | (0xc264 >> 2),
649 0x00000000,
650 (0x0e00 << 16) | (0xc268 >> 2),
651 0x00000000,
652 (0x0e00 << 16) | (0xc26c >> 2),
653 0x00000000,
654 (0x0e00 << 16) | (0xc270 >> 2),
655 0x00000000,
656 (0x0e00 << 16) | (0xc274 >> 2),
657 0x00000000,
658 (0x0e00 << 16) | (0xc28c >> 2),
659 0x00000000,
660 (0x0e00 << 16) | (0xc290 >> 2),
661 0x00000000,
662 (0x0e00 << 16) | (0xc294 >> 2),
663 0x00000000,
664 (0x0e00 << 16) | (0xc298 >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xc2a0 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xc2a4 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0xc2a8 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0xc2ac >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0x301d0 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0x30238 >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0x30250 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0x30254 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0x30258 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0x3025c >> 2),
685 0x00000000,
686 (0x4e00 << 16) | (0xc900 >> 2),
687 0x00000000,
688 (0x5e00 << 16) | (0xc900 >> 2),
689 0x00000000,
690 (0x6e00 << 16) | (0xc900 >> 2),
691 0x00000000,
692 (0x7e00 << 16) | (0xc900 >> 2),
693 0x00000000,
694 (0x4e00 << 16) | (0xc904 >> 2),
695 0x00000000,
696 (0x5e00 << 16) | (0xc904 >> 2),
697 0x00000000,
698 (0x6e00 << 16) | (0xc904 >> 2),
699 0x00000000,
700 (0x7e00 << 16) | (0xc904 >> 2),
701 0x00000000,
702 (0x4e00 << 16) | (0xc908 >> 2),
703 0x00000000,
704 (0x5e00 << 16) | (0xc908 >> 2),
705 0x00000000,
706 (0x6e00 << 16) | (0xc908 >> 2),
707 0x00000000,
708 (0x7e00 << 16) | (0xc908 >> 2),
709 0x00000000,
710 (0x4e00 << 16) | (0xc90c >> 2),
711 0x00000000,
712 (0x5e00 << 16) | (0xc90c >> 2),
713 0x00000000,
714 (0x6e00 << 16) | (0xc90c >> 2),
715 0x00000000,
716 (0x7e00 << 16) | (0xc90c >> 2),
717 0x00000000,
718 (0x4e00 << 16) | (0xc910 >> 2),
719 0x00000000,
720 (0x5e00 << 16) | (0xc910 >> 2),
721 0x00000000,
722 (0x6e00 << 16) | (0xc910 >> 2),
723 0x00000000,
724 (0x7e00 << 16) | (0xc910 >> 2),
725 0x00000000,
726 (0x0e00 << 16) | (0xc99c >> 2),
727 0x00000000,
728 (0x0e00 << 16) | (0x9834 >> 2),
729 0x00000000,
730 (0x0000 << 16) | (0x30f00 >> 2),
731 0x00000000,
732 (0x0000 << 16) | (0x30f04 >> 2),
733 0x00000000,
734 (0x0000 << 16) | (0x30f08 >> 2),
735 0x00000000,
736 (0x0000 << 16) | (0x30f0c >> 2),
737 0x00000000,
738 (0x0600 << 16) | (0x9b7c >> 2),
739 0x00000000,
740 (0x0e00 << 16) | (0x8a14 >> 2),
741 0x00000000,
742 (0x0e00 << 16) | (0x8a18 >> 2),
743 0x00000000,
744 (0x0600 << 16) | (0x30a00 >> 2),
745 0x00000000,
746 (0x0e00 << 16) | (0x8bf0 >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x8bcc >> 2),
749 0x00000000,
750 (0x0e00 << 16) | (0x8b24 >> 2),
751 0x00000000,
752 (0x0e00 << 16) | (0x30a04 >> 2),
753 0x00000000,
754 (0x0600 << 16) | (0x30a10 >> 2),
755 0x00000000,
756 (0x0600 << 16) | (0x30a14 >> 2),
757 0x00000000,
758 (0x0600 << 16) | (0x30a18 >> 2),
759 0x00000000,
760 (0x0600 << 16) | (0x30a2c >> 2),
761 0x00000000,
762 (0x0e00 << 16) | (0xc700 >> 2),
763 0x00000000,
764 (0x0e00 << 16) | (0xc704 >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0xc708 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0xc768 >> 2),
769 0x00000000,
770 (0x0400 << 16) | (0xc770 >> 2),
771 0x00000000,
772 (0x0400 << 16) | (0xc774 >> 2),
773 0x00000000,
774 (0x0400 << 16) | (0xc798 >> 2),
775 0x00000000,
776 (0x0400 << 16) | (0xc79c >> 2),
777 0x00000000,
778 (0x0e00 << 16) | (0x9100 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x3c010 >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x8c00 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0x8c04 >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0x8c20 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0x8c38 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0x8c3c >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0xae00 >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0x9604 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0xac08 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xac0c >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xac10 >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0xac14 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0xac58 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0xac68 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0xac6c >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0xac70 >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xac74 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0xac78 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xac7c >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xac80 >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xac84 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0xac88 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xac8c >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0x970c >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0x9714 >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0x9718 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0x971c >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0x31068 >> 2),
835 0x00000000,
836 (0x4e00 << 16) | (0x31068 >> 2),
837 0x00000000,
838 (0x5e00 << 16) | (0x31068 >> 2),
839 0x00000000,
840 (0x6e00 << 16) | (0x31068 >> 2),
841 0x00000000,
842 (0x7e00 << 16) | (0x31068 >> 2),
843 0x00000000,
844 (0x0e00 << 16) | (0xcd10 >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0xcd14 >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0x88b0 >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0x88b4 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x88b8 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x88bc >> 2),
855 0x00000000,
856 (0x0400 << 16) | (0x89c0 >> 2),
857 0x00000000,
858 (0x0e00 << 16) | (0x88c4 >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x88c8 >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x88d0 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x88d4 >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0x88d8 >> 2),
867 0x00000000,
868 (0x0e00 << 16) | (0x8980 >> 2),
869 0x00000000,
870 (0x0e00 << 16) | (0x30938 >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x3093c >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x30940 >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0x89a0 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x30900 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x30904 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x89b4 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x3e1fc >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x3c210 >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x3c214 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x3c218 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x8904 >> 2),
893 0x00000000,
894 0x5,
895 (0x0e00 << 16) | (0x8c28 >> 2),
896 (0x0e00 << 16) | (0x8c2c >> 2),
897 (0x0e00 << 16) | (0x8c30 >> 2),
898 (0x0e00 << 16) | (0x8c34 >> 2),
899 (0x0e00 << 16) | (0x9600 >> 2),
900};
901
Alex Deucher0aafd312013-04-09 14:43:30 -0400902static const u32 bonaire_golden_spm_registers[] =
903{
904 0x30800, 0xe0ffffff, 0xe0000000
905};
906
907static const u32 bonaire_golden_common_registers[] =
908{
909 0xc770, 0xffffffff, 0x00000800,
910 0xc774, 0xffffffff, 0x00000800,
911 0xc798, 0xffffffff, 0x00007fbf,
912 0xc79c, 0xffffffff, 0x00007faf
913};
914
915static const u32 bonaire_golden_registers[] =
916{
917 0x3354, 0x00000333, 0x00000333,
918 0x3350, 0x000c0fc0, 0x00040200,
919 0x9a10, 0x00010000, 0x00058208,
920 0x3c000, 0xffff1fff, 0x00140000,
921 0x3c200, 0xfdfc0fff, 0x00000100,
922 0x3c234, 0x40000000, 0x40000200,
923 0x9830, 0xffffffff, 0x00000000,
924 0x9834, 0xf00fffff, 0x00000400,
925 0x9838, 0x0002021c, 0x00020200,
926 0xc78, 0x00000080, 0x00000000,
927 0x5bb0, 0x000000f0, 0x00000070,
928 0x5bc0, 0xf0311fff, 0x80300000,
929 0x98f8, 0x73773777, 0x12010001,
930 0x350c, 0x00810000, 0x408af000,
931 0x7030, 0x31000111, 0x00000011,
932 0x2f48, 0x73773777, 0x12010001,
933 0x220c, 0x00007fb6, 0x0021a1b1,
934 0x2210, 0x00007fb6, 0x002021b1,
935 0x2180, 0x00007fb6, 0x00002191,
936 0x2218, 0x00007fb6, 0x002121b1,
937 0x221c, 0x00007fb6, 0x002021b1,
938 0x21dc, 0x00007fb6, 0x00002191,
939 0x21e0, 0x00007fb6, 0x00002191,
940 0x3628, 0x0000003f, 0x0000000a,
941 0x362c, 0x0000003f, 0x0000000a,
942 0x2ae4, 0x00073ffe, 0x000022a2,
943 0x240c, 0x000007ff, 0x00000000,
944 0x8a14, 0xf000003f, 0x00000007,
945 0x8bf0, 0x00002001, 0x00000001,
946 0x8b24, 0xffffffff, 0x00ffffff,
947 0x30a04, 0x0000ff0f, 0x00000000,
948 0x28a4c, 0x07ffffff, 0x06000000,
949 0x4d8, 0x00000fff, 0x00000100,
950 0x3e78, 0x00000001, 0x00000002,
951 0x9100, 0x03000000, 0x0362c688,
952 0x8c00, 0x000000ff, 0x00000001,
953 0xe40, 0x00001fff, 0x00001fff,
954 0x9060, 0x0000007f, 0x00000020,
955 0x9508, 0x00010000, 0x00010000,
956 0xac14, 0x000003ff, 0x000000f3,
957 0xac0c, 0xffffffff, 0x00001032
958};
959
960static const u32 bonaire_mgcg_cgcg_init[] =
961{
962 0xc420, 0xffffffff, 0xfffffffc,
963 0x30800, 0xffffffff, 0xe0000000,
964 0x3c2a0, 0xffffffff, 0x00000100,
965 0x3c208, 0xffffffff, 0x00000100,
966 0x3c2c0, 0xffffffff, 0xc0000100,
967 0x3c2c8, 0xffffffff, 0xc0000100,
968 0x3c2c4, 0xffffffff, 0xc0000100,
969 0x55e4, 0xffffffff, 0x00600100,
970 0x3c280, 0xffffffff, 0x00000100,
971 0x3c214, 0xffffffff, 0x06000100,
972 0x3c220, 0xffffffff, 0x00000100,
973 0x3c218, 0xffffffff, 0x06000100,
974 0x3c204, 0xffffffff, 0x00000100,
975 0x3c2e0, 0xffffffff, 0x00000100,
976 0x3c224, 0xffffffff, 0x00000100,
977 0x3c200, 0xffffffff, 0x00000100,
978 0x3c230, 0xffffffff, 0x00000100,
979 0x3c234, 0xffffffff, 0x00000100,
980 0x3c250, 0xffffffff, 0x00000100,
981 0x3c254, 0xffffffff, 0x00000100,
982 0x3c258, 0xffffffff, 0x00000100,
983 0x3c25c, 0xffffffff, 0x00000100,
984 0x3c260, 0xffffffff, 0x00000100,
985 0x3c27c, 0xffffffff, 0x00000100,
986 0x3c278, 0xffffffff, 0x00000100,
987 0x3c210, 0xffffffff, 0x06000100,
988 0x3c290, 0xffffffff, 0x00000100,
989 0x3c274, 0xffffffff, 0x00000100,
990 0x3c2b4, 0xffffffff, 0x00000100,
991 0x3c2b0, 0xffffffff, 0x00000100,
992 0x3c270, 0xffffffff, 0x00000100,
993 0x30800, 0xffffffff, 0xe0000000,
994 0x3c020, 0xffffffff, 0x00010000,
995 0x3c024, 0xffffffff, 0x00030002,
996 0x3c028, 0xffffffff, 0x00040007,
997 0x3c02c, 0xffffffff, 0x00060005,
998 0x3c030, 0xffffffff, 0x00090008,
999 0x3c034, 0xffffffff, 0x00010000,
1000 0x3c038, 0xffffffff, 0x00030002,
1001 0x3c03c, 0xffffffff, 0x00040007,
1002 0x3c040, 0xffffffff, 0x00060005,
1003 0x3c044, 0xffffffff, 0x00090008,
1004 0x3c048, 0xffffffff, 0x00010000,
1005 0x3c04c, 0xffffffff, 0x00030002,
1006 0x3c050, 0xffffffff, 0x00040007,
1007 0x3c054, 0xffffffff, 0x00060005,
1008 0x3c058, 0xffffffff, 0x00090008,
1009 0x3c05c, 0xffffffff, 0x00010000,
1010 0x3c060, 0xffffffff, 0x00030002,
1011 0x3c064, 0xffffffff, 0x00040007,
1012 0x3c068, 0xffffffff, 0x00060005,
1013 0x3c06c, 0xffffffff, 0x00090008,
1014 0x3c070, 0xffffffff, 0x00010000,
1015 0x3c074, 0xffffffff, 0x00030002,
1016 0x3c078, 0xffffffff, 0x00040007,
1017 0x3c07c, 0xffffffff, 0x00060005,
1018 0x3c080, 0xffffffff, 0x00090008,
1019 0x3c084, 0xffffffff, 0x00010000,
1020 0x3c088, 0xffffffff, 0x00030002,
1021 0x3c08c, 0xffffffff, 0x00040007,
1022 0x3c090, 0xffffffff, 0x00060005,
1023 0x3c094, 0xffffffff, 0x00090008,
1024 0x3c098, 0xffffffff, 0x00010000,
1025 0x3c09c, 0xffffffff, 0x00030002,
1026 0x3c0a0, 0xffffffff, 0x00040007,
1027 0x3c0a4, 0xffffffff, 0x00060005,
1028 0x3c0a8, 0xffffffff, 0x00090008,
1029 0x3c000, 0xffffffff, 0x96e00200,
1030 0x8708, 0xffffffff, 0x00900100,
1031 0xc424, 0xffffffff, 0x0020003f,
1032 0x38, 0xffffffff, 0x0140001c,
1033 0x3c, 0x000f0000, 0x000f0000,
1034 0x220, 0xffffffff, 0xC060000C,
1035 0x224, 0xc0000fff, 0x00000100,
1036 0xf90, 0xffffffff, 0x00000100,
1037 0xf98, 0x00000101, 0x00000000,
1038 0x20a8, 0xffffffff, 0x00000104,
1039 0x55e4, 0xff000fff, 0x00000100,
1040 0x30cc, 0xc0000fff, 0x00000104,
1041 0xc1e4, 0x00000001, 0x00000001,
1042 0xd00c, 0xff000ff0, 0x00000100,
1043 0xd80c, 0xff000ff0, 0x00000100
1044};
1045
1046static const u32 spectre_golden_spm_registers[] =
1047{
1048 0x30800, 0xe0ffffff, 0xe0000000
1049};
1050
1051static const u32 spectre_golden_common_registers[] =
1052{
1053 0xc770, 0xffffffff, 0x00000800,
1054 0xc774, 0xffffffff, 0x00000800,
1055 0xc798, 0xffffffff, 0x00007fbf,
1056 0xc79c, 0xffffffff, 0x00007faf
1057};
1058
1059static const u32 spectre_golden_registers[] =
1060{
1061 0x3c000, 0xffff1fff, 0x96940200,
1062 0x3c00c, 0xffff0001, 0xff000000,
1063 0x3c200, 0xfffc0fff, 0x00000100,
1064 0x6ed8, 0x00010101, 0x00010000,
1065 0x9834, 0xf00fffff, 0x00000400,
1066 0x9838, 0xfffffffc, 0x00020200,
1067 0x5bb0, 0x000000f0, 0x00000070,
1068 0x5bc0, 0xf0311fff, 0x80300000,
1069 0x98f8, 0x73773777, 0x12010001,
1070 0x9b7c, 0x00ff0000, 0x00fc0000,
1071 0x2f48, 0x73773777, 0x12010001,
1072 0x8a14, 0xf000003f, 0x00000007,
1073 0x8b24, 0xffffffff, 0x00ffffff,
1074 0x28350, 0x3f3f3fff, 0x00000082,
1075 0x28355, 0x0000003f, 0x00000000,
1076 0x3e78, 0x00000001, 0x00000002,
1077 0x913c, 0xffff03df, 0x00000004,
1078 0xc768, 0x00000008, 0x00000008,
1079 0x8c00, 0x000008ff, 0x00000800,
1080 0x9508, 0x00010000, 0x00010000,
1081 0xac0c, 0xffffffff, 0x54763210,
1082 0x214f8, 0x01ff01ff, 0x00000002,
1083 0x21498, 0x007ff800, 0x00200000,
1084 0x2015c, 0xffffffff, 0x00000f40,
1085 0x30934, 0xffffffff, 0x00000001
1086};
1087
1088static const u32 spectre_mgcg_cgcg_init[] =
1089{
1090 0xc420, 0xffffffff, 0xfffffffc,
1091 0x30800, 0xffffffff, 0xe0000000,
1092 0x3c2a0, 0xffffffff, 0x00000100,
1093 0x3c208, 0xffffffff, 0x00000100,
1094 0x3c2c0, 0xffffffff, 0x00000100,
1095 0x3c2c8, 0xffffffff, 0x00000100,
1096 0x3c2c4, 0xffffffff, 0x00000100,
1097 0x55e4, 0xffffffff, 0x00600100,
1098 0x3c280, 0xffffffff, 0x00000100,
1099 0x3c214, 0xffffffff, 0x06000100,
1100 0x3c220, 0xffffffff, 0x00000100,
1101 0x3c218, 0xffffffff, 0x06000100,
1102 0x3c204, 0xffffffff, 0x00000100,
1103 0x3c2e0, 0xffffffff, 0x00000100,
1104 0x3c224, 0xffffffff, 0x00000100,
1105 0x3c200, 0xffffffff, 0x00000100,
1106 0x3c230, 0xffffffff, 0x00000100,
1107 0x3c234, 0xffffffff, 0x00000100,
1108 0x3c250, 0xffffffff, 0x00000100,
1109 0x3c254, 0xffffffff, 0x00000100,
1110 0x3c258, 0xffffffff, 0x00000100,
1111 0x3c25c, 0xffffffff, 0x00000100,
1112 0x3c260, 0xffffffff, 0x00000100,
1113 0x3c27c, 0xffffffff, 0x00000100,
1114 0x3c278, 0xffffffff, 0x00000100,
1115 0x3c210, 0xffffffff, 0x06000100,
1116 0x3c290, 0xffffffff, 0x00000100,
1117 0x3c274, 0xffffffff, 0x00000100,
1118 0x3c2b4, 0xffffffff, 0x00000100,
1119 0x3c2b0, 0xffffffff, 0x00000100,
1120 0x3c270, 0xffffffff, 0x00000100,
1121 0x30800, 0xffffffff, 0xe0000000,
1122 0x3c020, 0xffffffff, 0x00010000,
1123 0x3c024, 0xffffffff, 0x00030002,
1124 0x3c028, 0xffffffff, 0x00040007,
1125 0x3c02c, 0xffffffff, 0x00060005,
1126 0x3c030, 0xffffffff, 0x00090008,
1127 0x3c034, 0xffffffff, 0x00010000,
1128 0x3c038, 0xffffffff, 0x00030002,
1129 0x3c03c, 0xffffffff, 0x00040007,
1130 0x3c040, 0xffffffff, 0x00060005,
1131 0x3c044, 0xffffffff, 0x00090008,
1132 0x3c048, 0xffffffff, 0x00010000,
1133 0x3c04c, 0xffffffff, 0x00030002,
1134 0x3c050, 0xffffffff, 0x00040007,
1135 0x3c054, 0xffffffff, 0x00060005,
1136 0x3c058, 0xffffffff, 0x00090008,
1137 0x3c05c, 0xffffffff, 0x00010000,
1138 0x3c060, 0xffffffff, 0x00030002,
1139 0x3c064, 0xffffffff, 0x00040007,
1140 0x3c068, 0xffffffff, 0x00060005,
1141 0x3c06c, 0xffffffff, 0x00090008,
1142 0x3c070, 0xffffffff, 0x00010000,
1143 0x3c074, 0xffffffff, 0x00030002,
1144 0x3c078, 0xffffffff, 0x00040007,
1145 0x3c07c, 0xffffffff, 0x00060005,
1146 0x3c080, 0xffffffff, 0x00090008,
1147 0x3c084, 0xffffffff, 0x00010000,
1148 0x3c088, 0xffffffff, 0x00030002,
1149 0x3c08c, 0xffffffff, 0x00040007,
1150 0x3c090, 0xffffffff, 0x00060005,
1151 0x3c094, 0xffffffff, 0x00090008,
1152 0x3c098, 0xffffffff, 0x00010000,
1153 0x3c09c, 0xffffffff, 0x00030002,
1154 0x3c0a0, 0xffffffff, 0x00040007,
1155 0x3c0a4, 0xffffffff, 0x00060005,
1156 0x3c0a8, 0xffffffff, 0x00090008,
1157 0x3c0ac, 0xffffffff, 0x00010000,
1158 0x3c0b0, 0xffffffff, 0x00030002,
1159 0x3c0b4, 0xffffffff, 0x00040007,
1160 0x3c0b8, 0xffffffff, 0x00060005,
1161 0x3c0bc, 0xffffffff, 0x00090008,
1162 0x3c000, 0xffffffff, 0x96e00200,
1163 0x8708, 0xffffffff, 0x00900100,
1164 0xc424, 0xffffffff, 0x0020003f,
1165 0x38, 0xffffffff, 0x0140001c,
1166 0x3c, 0x000f0000, 0x000f0000,
1167 0x220, 0xffffffff, 0xC060000C,
1168 0x224, 0xc0000fff, 0x00000100,
1169 0xf90, 0xffffffff, 0x00000100,
1170 0xf98, 0x00000101, 0x00000000,
1171 0x20a8, 0xffffffff, 0x00000104,
1172 0x55e4, 0xff000fff, 0x00000100,
1173 0x30cc, 0xc0000fff, 0x00000104,
1174 0xc1e4, 0x00000001, 0x00000001,
1175 0xd00c, 0xff000ff0, 0x00000100,
1176 0xd80c, 0xff000ff0, 0x00000100
1177};
1178
1179static const u32 kalindi_golden_spm_registers[] =
1180{
1181 0x30800, 0xe0ffffff, 0xe0000000
1182};
1183
1184static const u32 kalindi_golden_common_registers[] =
1185{
1186 0xc770, 0xffffffff, 0x00000800,
1187 0xc774, 0xffffffff, 0x00000800,
1188 0xc798, 0xffffffff, 0x00007fbf,
1189 0xc79c, 0xffffffff, 0x00007faf
1190};
1191
1192static const u32 kalindi_golden_registers[] =
1193{
1194 0x3c000, 0xffffdfff, 0x6e944040,
1195 0x55e4, 0xff607fff, 0xfc000100,
1196 0x3c220, 0xff000fff, 0x00000100,
1197 0x3c224, 0xff000fff, 0x00000100,
1198 0x3c200, 0xfffc0fff, 0x00000100,
1199 0x6ed8, 0x00010101, 0x00010000,
1200 0x9830, 0xffffffff, 0x00000000,
1201 0x9834, 0xf00fffff, 0x00000400,
1202 0x5bb0, 0x000000f0, 0x00000070,
1203 0x5bc0, 0xf0311fff, 0x80300000,
1204 0x98f8, 0x73773777, 0x12010001,
1205 0x98fc, 0xffffffff, 0x00000010,
1206 0x9b7c, 0x00ff0000, 0x00fc0000,
1207 0x8030, 0x00001f0f, 0x0000100a,
1208 0x2f48, 0x73773777, 0x12010001,
1209 0x2408, 0x000fffff, 0x000c007f,
1210 0x8a14, 0xf000003f, 0x00000007,
1211 0x8b24, 0x3fff3fff, 0x00ffcfff,
1212 0x30a04, 0x0000ff0f, 0x00000000,
1213 0x28a4c, 0x07ffffff, 0x06000000,
1214 0x4d8, 0x00000fff, 0x00000100,
1215 0x3e78, 0x00000001, 0x00000002,
1216 0xc768, 0x00000008, 0x00000008,
1217 0x8c00, 0x000000ff, 0x00000003,
1218 0x214f8, 0x01ff01ff, 0x00000002,
1219 0x21498, 0x007ff800, 0x00200000,
1220 0x2015c, 0xffffffff, 0x00000f40,
1221 0x88c4, 0x001f3ae3, 0x00000082,
1222 0x88d4, 0x0000001f, 0x00000010,
1223 0x30934, 0xffffffff, 0x00000000
1224};
1225
1226static const u32 kalindi_mgcg_cgcg_init[] =
1227{
1228 0xc420, 0xffffffff, 0xfffffffc,
1229 0x30800, 0xffffffff, 0xe0000000,
1230 0x3c2a0, 0xffffffff, 0x00000100,
1231 0x3c208, 0xffffffff, 0x00000100,
1232 0x3c2c0, 0xffffffff, 0x00000100,
1233 0x3c2c8, 0xffffffff, 0x00000100,
1234 0x3c2c4, 0xffffffff, 0x00000100,
1235 0x55e4, 0xffffffff, 0x00600100,
1236 0x3c280, 0xffffffff, 0x00000100,
1237 0x3c214, 0xffffffff, 0x06000100,
1238 0x3c220, 0xffffffff, 0x00000100,
1239 0x3c218, 0xffffffff, 0x06000100,
1240 0x3c204, 0xffffffff, 0x00000100,
1241 0x3c2e0, 0xffffffff, 0x00000100,
1242 0x3c224, 0xffffffff, 0x00000100,
1243 0x3c200, 0xffffffff, 0x00000100,
1244 0x3c230, 0xffffffff, 0x00000100,
1245 0x3c234, 0xffffffff, 0x00000100,
1246 0x3c250, 0xffffffff, 0x00000100,
1247 0x3c254, 0xffffffff, 0x00000100,
1248 0x3c258, 0xffffffff, 0x00000100,
1249 0x3c25c, 0xffffffff, 0x00000100,
1250 0x3c260, 0xffffffff, 0x00000100,
1251 0x3c27c, 0xffffffff, 0x00000100,
1252 0x3c278, 0xffffffff, 0x00000100,
1253 0x3c210, 0xffffffff, 0x06000100,
1254 0x3c290, 0xffffffff, 0x00000100,
1255 0x3c274, 0xffffffff, 0x00000100,
1256 0x3c2b4, 0xffffffff, 0x00000100,
1257 0x3c2b0, 0xffffffff, 0x00000100,
1258 0x3c270, 0xffffffff, 0x00000100,
1259 0x30800, 0xffffffff, 0xe0000000,
1260 0x3c020, 0xffffffff, 0x00010000,
1261 0x3c024, 0xffffffff, 0x00030002,
1262 0x3c028, 0xffffffff, 0x00040007,
1263 0x3c02c, 0xffffffff, 0x00060005,
1264 0x3c030, 0xffffffff, 0x00090008,
1265 0x3c034, 0xffffffff, 0x00010000,
1266 0x3c038, 0xffffffff, 0x00030002,
1267 0x3c03c, 0xffffffff, 0x00040007,
1268 0x3c040, 0xffffffff, 0x00060005,
1269 0x3c044, 0xffffffff, 0x00090008,
1270 0x3c000, 0xffffffff, 0x96e00200,
1271 0x8708, 0xffffffff, 0x00900100,
1272 0xc424, 0xffffffff, 0x0020003f,
1273 0x38, 0xffffffff, 0x0140001c,
1274 0x3c, 0x000f0000, 0x000f0000,
1275 0x220, 0xffffffff, 0xC060000C,
1276 0x224, 0xc0000fff, 0x00000100,
1277 0x20a8, 0xffffffff, 0x00000104,
1278 0x55e4, 0xff000fff, 0x00000100,
1279 0x30cc, 0xc0000fff, 0x00000104,
1280 0xc1e4, 0x00000001, 0x00000001,
1281 0xd00c, 0xff000ff0, 0x00000100,
1282 0xd80c, 0xff000ff0, 0x00000100
1283};
1284
1285static void cik_init_golden_registers(struct radeon_device *rdev)
1286{
1287 switch (rdev->family) {
1288 case CHIP_BONAIRE:
1289 radeon_program_register_sequence(rdev,
1290 bonaire_mgcg_cgcg_init,
1291 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1292 radeon_program_register_sequence(rdev,
1293 bonaire_golden_registers,
1294 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1295 radeon_program_register_sequence(rdev,
1296 bonaire_golden_common_registers,
1297 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1298 radeon_program_register_sequence(rdev,
1299 bonaire_golden_spm_registers,
1300 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1301 break;
1302 case CHIP_KABINI:
1303 radeon_program_register_sequence(rdev,
1304 kalindi_mgcg_cgcg_init,
1305 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1306 radeon_program_register_sequence(rdev,
1307 kalindi_golden_registers,
1308 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1309 radeon_program_register_sequence(rdev,
1310 kalindi_golden_common_registers,
1311 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1312 radeon_program_register_sequence(rdev,
1313 kalindi_golden_spm_registers,
1314 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1315 break;
1316 case CHIP_KAVERI:
1317 radeon_program_register_sequence(rdev,
1318 spectre_mgcg_cgcg_init,
1319 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1320 radeon_program_register_sequence(rdev,
1321 spectre_golden_registers,
1322 (const u32)ARRAY_SIZE(spectre_golden_registers));
1323 radeon_program_register_sequence(rdev,
1324 spectre_golden_common_registers,
1325 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1326 radeon_program_register_sequence(rdev,
1327 spectre_golden_spm_registers,
1328 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1329 break;
1330 default:
1331 break;
1332 }
1333}
1334
Alex Deucher2c679122013-04-09 13:32:18 -04001335/**
1336 * cik_get_xclk - get the xclk
1337 *
1338 * @rdev: radeon_device pointer
1339 *
1340 * Returns the reference clock used by the gfx engine
1341 * (CIK).
1342 */
1343u32 cik_get_xclk(struct radeon_device *rdev)
1344{
1345 u32 reference_clock = rdev->clock.spll.reference_freq;
1346
1347 if (rdev->flags & RADEON_IS_IGP) {
1348 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1349 return reference_clock / 2;
1350 } else {
1351 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1352 return reference_clock / 4;
1353 }
1354 return reference_clock;
1355}
1356
Alex Deucher75efdee2013-03-04 12:47:46 -05001357/**
1358 * cik_mm_rdoorbell - read a doorbell dword
1359 *
1360 * @rdev: radeon_device pointer
1361 * @offset: byte offset into the aperture
1362 *
1363 * Returns the value in the doorbell aperture at the
1364 * requested offset (CIK).
1365 */
1366u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1367{
1368 if (offset < rdev->doorbell.size) {
1369 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1370 } else {
1371 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1372 return 0;
1373 }
1374}
1375
1376/**
1377 * cik_mm_wdoorbell - write a doorbell dword
1378 *
1379 * @rdev: radeon_device pointer
1380 * @offset: byte offset into the aperture
1381 * @v: value to write
1382 *
1383 * Writes @v to the doorbell aperture at the
1384 * requested offset (CIK).
1385 */
1386void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1387{
1388 if (offset < rdev->doorbell.size) {
1389 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1390 } else {
1391 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1392 }
1393}
1394
Alex Deucherbc8273f2012-06-29 19:44:04 -04001395#define BONAIRE_IO_MC_REGS_SIZE 36
1396
1397static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1398{
1399 {0x00000070, 0x04400000},
1400 {0x00000071, 0x80c01803},
1401 {0x00000072, 0x00004004},
1402 {0x00000073, 0x00000100},
1403 {0x00000074, 0x00ff0000},
1404 {0x00000075, 0x34000000},
1405 {0x00000076, 0x08000014},
1406 {0x00000077, 0x00cc08ec},
1407 {0x00000078, 0x00000400},
1408 {0x00000079, 0x00000000},
1409 {0x0000007a, 0x04090000},
1410 {0x0000007c, 0x00000000},
1411 {0x0000007e, 0x4408a8e8},
1412 {0x0000007f, 0x00000304},
1413 {0x00000080, 0x00000000},
1414 {0x00000082, 0x00000001},
1415 {0x00000083, 0x00000002},
1416 {0x00000084, 0xf3e4f400},
1417 {0x00000085, 0x052024e3},
1418 {0x00000087, 0x00000000},
1419 {0x00000088, 0x01000000},
1420 {0x0000008a, 0x1c0a0000},
1421 {0x0000008b, 0xff010000},
1422 {0x0000008d, 0xffffefff},
1423 {0x0000008e, 0xfff3efff},
1424 {0x0000008f, 0xfff3efbf},
1425 {0x00000092, 0xf7ffffff},
1426 {0x00000093, 0xffffff7f},
1427 {0x00000095, 0x00101101},
1428 {0x00000096, 0x00000fff},
1429 {0x00000097, 0x00116fff},
1430 {0x00000098, 0x60010000},
1431 {0x00000099, 0x10010000},
1432 {0x0000009a, 0x00006000},
1433 {0x0000009b, 0x00001000},
1434 {0x0000009f, 0x00b48000}
1435};
1436
Alex Deucherb556b122013-01-29 10:44:22 -05001437/**
1438 * cik_srbm_select - select specific register instances
1439 *
1440 * @rdev: radeon_device pointer
1441 * @me: selected ME (micro engine)
1442 * @pipe: pipe
1443 * @queue: queue
1444 * @vmid: VMID
1445 *
1446 * Switches the currently active registers instances. Some
1447 * registers are instanced per VMID, others are instanced per
1448 * me/pipe/queue combination.
1449 */
1450static void cik_srbm_select(struct radeon_device *rdev,
1451 u32 me, u32 pipe, u32 queue, u32 vmid)
1452{
1453 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1454 MEID(me & 0x3) |
1455 VMID(vmid & 0xf) |
1456 QUEUEID(queue & 0x7));
1457 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1458}
1459
Alex Deucherbc8273f2012-06-29 19:44:04 -04001460/* ucode loading */
1461/**
1462 * ci_mc_load_microcode - load MC ucode into the hw
1463 *
1464 * @rdev: radeon_device pointer
1465 *
1466 * Load the GDDR MC ucode into the hw (CIK).
1467 * Returns 0 on success, error on failure.
1468 */
1469static int ci_mc_load_microcode(struct radeon_device *rdev)
1470{
1471 const __be32 *fw_data;
1472 u32 running, blackout = 0;
1473 u32 *io_mc_regs;
1474 int i, ucode_size, regs_size;
1475
1476 if (!rdev->mc_fw)
1477 return -EINVAL;
1478
1479 switch (rdev->family) {
1480 case CHIP_BONAIRE:
1481 default:
1482 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1483 ucode_size = CIK_MC_UCODE_SIZE;
1484 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1485 break;
1486 }
1487
1488 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1489
1490 if (running == 0) {
1491 if (running) {
1492 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1493 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1494 }
1495
1496 /* reset the engine and set to writable */
1497 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1498 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1499
1500 /* load mc io regs */
1501 for (i = 0; i < regs_size; i++) {
1502 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1503 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1504 }
1505 /* load the MC ucode */
1506 fw_data = (const __be32 *)rdev->mc_fw->data;
1507 for (i = 0; i < ucode_size; i++)
1508 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1509
1510 /* put the engine back into the active state */
1511 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1514
1515 /* wait for training to complete */
1516 for (i = 0; i < rdev->usec_timeout; i++) {
1517 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1518 break;
1519 udelay(1);
1520 }
1521 for (i = 0; i < rdev->usec_timeout; i++) {
1522 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1523 break;
1524 udelay(1);
1525 }
1526
1527 if (running)
1528 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1529 }
1530
1531 return 0;
1532}
1533
Alex Deucher02c81322012-12-18 21:43:07 -05001534/**
1535 * cik_init_microcode - load ucode images from disk
1536 *
1537 * @rdev: radeon_device pointer
1538 *
1539 * Use the firmware interface to load the ucode images into
1540 * the driver (not loaded into hw).
1541 * Returns 0 on success, error on failure.
1542 */
1543static int cik_init_microcode(struct radeon_device *rdev)
1544{
Alex Deucher02c81322012-12-18 21:43:07 -05001545 const char *chip_name;
1546 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -04001547 mec_req_size, rlc_req_size, mc_req_size,
1548 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -05001549 char fw_name[30];
1550 int err;
1551
1552 DRM_DEBUG("\n");
1553
Alex Deucher02c81322012-12-18 21:43:07 -05001554 switch (rdev->family) {
1555 case CHIP_BONAIRE:
1556 chip_name = "BONAIRE";
1557 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1558 me_req_size = CIK_ME_UCODE_SIZE * 4;
1559 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1560 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1561 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1562 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001563 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001564 break;
1565 case CHIP_KAVERI:
1566 chip_name = "KAVERI";
1567 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1568 me_req_size = CIK_ME_UCODE_SIZE * 4;
1569 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1570 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1571 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001572 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001573 break;
1574 case CHIP_KABINI:
1575 chip_name = "KABINI";
1576 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1577 me_req_size = CIK_ME_UCODE_SIZE * 4;
1578 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1579 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1580 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001581 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001582 break;
1583 default: BUG();
1584 }
1585
1586 DRM_INFO("Loading %s Microcode\n", chip_name);
1587
1588 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001589 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001590 if (err)
1591 goto out;
1592 if (rdev->pfp_fw->size != pfp_req_size) {
1593 printk(KERN_ERR
1594 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1595 rdev->pfp_fw->size, fw_name);
1596 err = -EINVAL;
1597 goto out;
1598 }
1599
1600 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001601 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001602 if (err)
1603 goto out;
1604 if (rdev->me_fw->size != me_req_size) {
1605 printk(KERN_ERR
1606 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1607 rdev->me_fw->size, fw_name);
1608 err = -EINVAL;
1609 }
1610
1611 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001612 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001613 if (err)
1614 goto out;
1615 if (rdev->ce_fw->size != ce_req_size) {
1616 printk(KERN_ERR
1617 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1618 rdev->ce_fw->size, fw_name);
1619 err = -EINVAL;
1620 }
1621
1622 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001623 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001624 if (err)
1625 goto out;
1626 if (rdev->mec_fw->size != mec_req_size) {
1627 printk(KERN_ERR
1628 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1629 rdev->mec_fw->size, fw_name);
1630 err = -EINVAL;
1631 }
1632
1633 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001634 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001635 if (err)
1636 goto out;
1637 if (rdev->rlc_fw->size != rlc_req_size) {
1638 printk(KERN_ERR
1639 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1640 rdev->rlc_fw->size, fw_name);
1641 err = -EINVAL;
1642 }
1643
Alex Deucher21a93e12013-04-09 12:47:11 -04001644 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001645 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -04001646 if (err)
1647 goto out;
1648 if (rdev->sdma_fw->size != sdma_req_size) {
1649 printk(KERN_ERR
1650 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1651 rdev->sdma_fw->size, fw_name);
1652 err = -EINVAL;
1653 }
1654
Alex Deucher02c81322012-12-18 21:43:07 -05001655 /* No MC ucode on APUs */
1656 if (!(rdev->flags & RADEON_IS_IGP)) {
1657 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001658 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001659 if (err)
1660 goto out;
1661 if (rdev->mc_fw->size != mc_req_size) {
1662 printk(KERN_ERR
1663 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1664 rdev->mc_fw->size, fw_name);
1665 err = -EINVAL;
1666 }
1667 }
1668
1669out:
Alex Deucher02c81322012-12-18 21:43:07 -05001670 if (err) {
1671 if (err != -EINVAL)
1672 printk(KERN_ERR
1673 "cik_cp: Failed to load firmware \"%s\"\n",
1674 fw_name);
1675 release_firmware(rdev->pfp_fw);
1676 rdev->pfp_fw = NULL;
1677 release_firmware(rdev->me_fw);
1678 rdev->me_fw = NULL;
1679 release_firmware(rdev->ce_fw);
1680 rdev->ce_fw = NULL;
1681 release_firmware(rdev->rlc_fw);
1682 rdev->rlc_fw = NULL;
1683 release_firmware(rdev->mc_fw);
1684 rdev->mc_fw = NULL;
1685 }
1686 return err;
1687}
1688
Alex Deucher8cc1a532013-04-09 12:41:24 -04001689/*
1690 * Core functions
1691 */
1692/**
1693 * cik_tiling_mode_table_init - init the hw tiling table
1694 *
1695 * @rdev: radeon_device pointer
1696 *
1697 * Starting with SI, the tiling setup is done globally in a
1698 * set of 32 tiling modes. Rather than selecting each set of
1699 * parameters per surface as on older asics, we just select
1700 * which index in the tiling table we want to use, and the
1701 * surface uses those parameters (CIK).
1702 */
1703static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1704{
1705 const u32 num_tile_mode_states = 32;
1706 const u32 num_secondary_tile_mode_states = 16;
1707 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1708 u32 num_pipe_configs;
1709 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1710 rdev->config.cik.max_shader_engines;
1711
1712 switch (rdev->config.cik.mem_row_size_in_kb) {
1713 case 1:
1714 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1715 break;
1716 case 2:
1717 default:
1718 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1719 break;
1720 case 4:
1721 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1722 break;
1723 }
1724
1725 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1726 if (num_pipe_configs > 8)
1727 num_pipe_configs = 8; /* ??? */
1728
1729 if (num_pipe_configs == 8) {
1730 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1731 switch (reg_offset) {
1732 case 0:
1733 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1735 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1736 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1737 break;
1738 case 1:
1739 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1740 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1741 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1742 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1743 break;
1744 case 2:
1745 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1747 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1748 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1749 break;
1750 case 3:
1751 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1752 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1753 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1754 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1755 break;
1756 case 4:
1757 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1758 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1759 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1760 TILE_SPLIT(split_equal_to_row_size));
1761 break;
1762 case 5:
1763 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1764 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1765 break;
1766 case 6:
1767 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1768 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1769 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1771 break;
1772 case 7:
1773 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1775 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1776 TILE_SPLIT(split_equal_to_row_size));
1777 break;
1778 case 8:
1779 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1780 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1781 break;
1782 case 9:
1783 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1784 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1785 break;
1786 case 10:
1787 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1791 break;
1792 case 11:
1793 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1795 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1797 break;
1798 case 12:
1799 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1800 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1801 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1803 break;
1804 case 13:
1805 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1806 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1807 break;
1808 case 14:
1809 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1810 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1811 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1813 break;
1814 case 16:
1815 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1816 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1817 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1819 break;
1820 case 17:
1821 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1822 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1823 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1825 break;
1826 case 27:
1827 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1828 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1829 break;
1830 case 28:
1831 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1832 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1833 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1834 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1835 break;
1836 case 29:
1837 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1838 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1839 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1841 break;
1842 case 30:
1843 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1845 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1847 break;
1848 default:
1849 gb_tile_moden = 0;
1850 break;
1851 }
Alex Deucher39aee492013-04-10 13:41:25 -04001852 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001853 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1854 }
1855 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1856 switch (reg_offset) {
1857 case 0:
1858 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1859 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1860 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1861 NUM_BANKS(ADDR_SURF_16_BANK));
1862 break;
1863 case 1:
1864 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1867 NUM_BANKS(ADDR_SURF_16_BANK));
1868 break;
1869 case 2:
1870 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1873 NUM_BANKS(ADDR_SURF_16_BANK));
1874 break;
1875 case 3:
1876 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1879 NUM_BANKS(ADDR_SURF_16_BANK));
1880 break;
1881 case 4:
1882 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1885 NUM_BANKS(ADDR_SURF_8_BANK));
1886 break;
1887 case 5:
1888 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1889 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1890 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1891 NUM_BANKS(ADDR_SURF_4_BANK));
1892 break;
1893 case 6:
1894 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1897 NUM_BANKS(ADDR_SURF_2_BANK));
1898 break;
1899 case 8:
1900 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1901 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1902 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1903 NUM_BANKS(ADDR_SURF_16_BANK));
1904 break;
1905 case 9:
1906 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1909 NUM_BANKS(ADDR_SURF_16_BANK));
1910 break;
1911 case 10:
1912 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1913 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1914 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1915 NUM_BANKS(ADDR_SURF_16_BANK));
1916 break;
1917 case 11:
1918 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1921 NUM_BANKS(ADDR_SURF_16_BANK));
1922 break;
1923 case 12:
1924 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1927 NUM_BANKS(ADDR_SURF_8_BANK));
1928 break;
1929 case 13:
1930 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1933 NUM_BANKS(ADDR_SURF_4_BANK));
1934 break;
1935 case 14:
1936 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1939 NUM_BANKS(ADDR_SURF_2_BANK));
1940 break;
1941 default:
1942 gb_tile_moden = 0;
1943 break;
1944 }
1945 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1946 }
1947 } else if (num_pipe_configs == 4) {
1948 if (num_rbs == 4) {
1949 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1950 switch (reg_offset) {
1951 case 0:
1952 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1953 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1954 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1956 break;
1957 case 1:
1958 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1959 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1960 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1961 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1962 break;
1963 case 2:
1964 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1965 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1966 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1967 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1968 break;
1969 case 3:
1970 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1971 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1972 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1973 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1974 break;
1975 case 4:
1976 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1978 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1979 TILE_SPLIT(split_equal_to_row_size));
1980 break;
1981 case 5:
1982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1984 break;
1985 case 6:
1986 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1987 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1988 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1989 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1990 break;
1991 case 7:
1992 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1994 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1995 TILE_SPLIT(split_equal_to_row_size));
1996 break;
1997 case 8:
1998 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1999 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2000 break;
2001 case 9:
2002 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2003 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2004 break;
2005 case 10:
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2008 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2010 break;
2011 case 11:
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2014 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2016 break;
2017 case 12:
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2019 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2020 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2022 break;
2023 case 13:
2024 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2025 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2026 break;
2027 case 14:
2028 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2029 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2030 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2032 break;
2033 case 16:
2034 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2035 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2036 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2037 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2038 break;
2039 case 17:
2040 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2042 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2044 break;
2045 case 27:
2046 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2047 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2048 break;
2049 case 28:
2050 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2051 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2052 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2054 break;
2055 case 29:
2056 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2057 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2058 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 break;
2061 case 30:
2062 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2063 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2066 break;
2067 default:
2068 gb_tile_moden = 0;
2069 break;
2070 }
Alex Deucher39aee492013-04-10 13:41:25 -04002071 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002072 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2073 }
2074 } else if (num_rbs < 4) {
2075 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2076 switch (reg_offset) {
2077 case 0:
2078 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2080 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2081 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2082 break;
2083 case 1:
2084 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2086 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2087 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2088 break;
2089 case 2:
2090 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2092 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2093 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2094 break;
2095 case 3:
2096 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2098 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2099 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2100 break;
2101 case 4:
2102 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2104 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2105 TILE_SPLIT(split_equal_to_row_size));
2106 break;
2107 case 5:
2108 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 break;
2111 case 6:
2112 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2114 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2115 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2116 break;
2117 case 7:
2118 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2120 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2121 TILE_SPLIT(split_equal_to_row_size));
2122 break;
2123 case 8:
2124 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2125 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2126 break;
2127 case 9:
2128 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2130 break;
2131 case 10:
2132 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2134 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 break;
2137 case 11:
2138 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2140 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142 break;
2143 case 12:
2144 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2145 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2146 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148 break;
2149 case 13:
2150 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2152 break;
2153 case 14:
2154 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2156 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158 break;
2159 case 16:
2160 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2161 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2162 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164 break;
2165 case 17:
2166 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170 break;
2171 case 27:
2172 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2174 break;
2175 case 28:
2176 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2177 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2178 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 break;
2181 case 29:
2182 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2184 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 break;
2187 case 30:
2188 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2190 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2192 break;
2193 default:
2194 gb_tile_moden = 0;
2195 break;
2196 }
Alex Deucher39aee492013-04-10 13:41:25 -04002197 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002198 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2199 }
2200 }
2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2202 switch (reg_offset) {
2203 case 0:
2204 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2207 NUM_BANKS(ADDR_SURF_16_BANK));
2208 break;
2209 case 1:
2210 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2213 NUM_BANKS(ADDR_SURF_16_BANK));
2214 break;
2215 case 2:
2216 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2219 NUM_BANKS(ADDR_SURF_16_BANK));
2220 break;
2221 case 3:
2222 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2225 NUM_BANKS(ADDR_SURF_16_BANK));
2226 break;
2227 case 4:
2228 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2231 NUM_BANKS(ADDR_SURF_16_BANK));
2232 break;
2233 case 5:
2234 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2237 NUM_BANKS(ADDR_SURF_8_BANK));
2238 break;
2239 case 6:
2240 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2243 NUM_BANKS(ADDR_SURF_4_BANK));
2244 break;
2245 case 8:
2246 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2249 NUM_BANKS(ADDR_SURF_16_BANK));
2250 break;
2251 case 9:
2252 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2255 NUM_BANKS(ADDR_SURF_16_BANK));
2256 break;
2257 case 10:
2258 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261 NUM_BANKS(ADDR_SURF_16_BANK));
2262 break;
2263 case 11:
2264 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267 NUM_BANKS(ADDR_SURF_16_BANK));
2268 break;
2269 case 12:
2270 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 NUM_BANKS(ADDR_SURF_16_BANK));
2274 break;
2275 case 13:
2276 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279 NUM_BANKS(ADDR_SURF_8_BANK));
2280 break;
2281 case 14:
2282 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2285 NUM_BANKS(ADDR_SURF_4_BANK));
2286 break;
2287 default:
2288 gb_tile_moden = 0;
2289 break;
2290 }
2291 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2292 }
2293 } else if (num_pipe_configs == 2) {
2294 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2295 switch (reg_offset) {
2296 case 0:
2297 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2301 break;
2302 case 1:
2303 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2307 break;
2308 case 2:
2309 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2313 break;
2314 case 3:
2315 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317 PIPE_CONFIG(ADDR_SURF_P2) |
2318 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2319 break;
2320 case 4:
2321 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 TILE_SPLIT(split_equal_to_row_size));
2325 break;
2326 case 5:
2327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329 break;
2330 case 6:
2331 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2333 PIPE_CONFIG(ADDR_SURF_P2) |
2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2335 break;
2336 case 7:
2337 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2339 PIPE_CONFIG(ADDR_SURF_P2) |
2340 TILE_SPLIT(split_equal_to_row_size));
2341 break;
2342 case 8:
2343 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2344 break;
2345 case 9:
2346 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2348 break;
2349 case 10:
2350 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 PIPE_CONFIG(ADDR_SURF_P2) |
2353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354 break;
2355 case 11:
2356 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 break;
2361 case 12:
2362 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 PIPE_CONFIG(ADDR_SURF_P2) |
2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 break;
2367 case 13:
2368 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2370 break;
2371 case 14:
2372 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 PIPE_CONFIG(ADDR_SURF_P2) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 break;
2377 case 16:
2378 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 PIPE_CONFIG(ADDR_SURF_P2) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 break;
2383 case 17:
2384 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P2) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 break;
2389 case 27:
2390 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2392 break;
2393 case 28:
2394 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2396 PIPE_CONFIG(ADDR_SURF_P2) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398 break;
2399 case 29:
2400 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2402 PIPE_CONFIG(ADDR_SURF_P2) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404 break;
2405 case 30:
2406 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2408 PIPE_CONFIG(ADDR_SURF_P2) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 break;
2411 default:
2412 gb_tile_moden = 0;
2413 break;
2414 }
Alex Deucher39aee492013-04-10 13:41:25 -04002415 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002416 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2417 }
2418 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2419 switch (reg_offset) {
2420 case 0:
2421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2424 NUM_BANKS(ADDR_SURF_16_BANK));
2425 break;
2426 case 1:
2427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2430 NUM_BANKS(ADDR_SURF_16_BANK));
2431 break;
2432 case 2:
2433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2436 NUM_BANKS(ADDR_SURF_16_BANK));
2437 break;
2438 case 3:
2439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2442 NUM_BANKS(ADDR_SURF_16_BANK));
2443 break;
2444 case 4:
2445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2448 NUM_BANKS(ADDR_SURF_16_BANK));
2449 break;
2450 case 5:
2451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2454 NUM_BANKS(ADDR_SURF_16_BANK));
2455 break;
2456 case 6:
2457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460 NUM_BANKS(ADDR_SURF_8_BANK));
2461 break;
2462 case 8:
2463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2466 NUM_BANKS(ADDR_SURF_16_BANK));
2467 break;
2468 case 9:
2469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2472 NUM_BANKS(ADDR_SURF_16_BANK));
2473 break;
2474 case 10:
2475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2478 NUM_BANKS(ADDR_SURF_16_BANK));
2479 break;
2480 case 11:
2481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2484 NUM_BANKS(ADDR_SURF_16_BANK));
2485 break;
2486 case 12:
2487 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2490 NUM_BANKS(ADDR_SURF_16_BANK));
2491 break;
2492 case 13:
2493 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2496 NUM_BANKS(ADDR_SURF_16_BANK));
2497 break;
2498 case 14:
2499 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2502 NUM_BANKS(ADDR_SURF_8_BANK));
2503 break;
2504 default:
2505 gb_tile_moden = 0;
2506 break;
2507 }
2508 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2509 }
2510 } else
2511 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2512}
2513
2514/**
2515 * cik_select_se_sh - select which SE, SH to address
2516 *
2517 * @rdev: radeon_device pointer
2518 * @se_num: shader engine to address
2519 * @sh_num: sh block to address
2520 *
2521 * Select which SE, SH combinations to address. Certain
2522 * registers are instanced per SE or SH. 0xffffffff means
2523 * broadcast to all SEs or SHs (CIK).
2524 */
2525static void cik_select_se_sh(struct radeon_device *rdev,
2526 u32 se_num, u32 sh_num)
2527{
2528 u32 data = INSTANCE_BROADCAST_WRITES;
2529
2530 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04002531 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002532 else if (se_num == 0xffffffff)
2533 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2534 else if (sh_num == 0xffffffff)
2535 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2536 else
2537 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2538 WREG32(GRBM_GFX_INDEX, data);
2539}
2540
2541/**
2542 * cik_create_bitmask - create a bitmask
2543 *
2544 * @bit_width: length of the mask
2545 *
2546 * create a variable length bit mask (CIK).
2547 * Returns the bitmask.
2548 */
2549static u32 cik_create_bitmask(u32 bit_width)
2550{
2551 u32 i, mask = 0;
2552
2553 for (i = 0; i < bit_width; i++) {
2554 mask <<= 1;
2555 mask |= 1;
2556 }
2557 return mask;
2558}
2559
2560/**
2561 * cik_select_se_sh - select which SE, SH to address
2562 *
2563 * @rdev: radeon_device pointer
2564 * @max_rb_num: max RBs (render backends) for the asic
2565 * @se_num: number of SEs (shader engines) for the asic
2566 * @sh_per_se: number of SH blocks per SE for the asic
2567 *
2568 * Calculates the bitmask of disabled RBs (CIK).
2569 * Returns the disabled RB bitmask.
2570 */
2571static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2572 u32 max_rb_num, u32 se_num,
2573 u32 sh_per_se)
2574{
2575 u32 data, mask;
2576
2577 data = RREG32(CC_RB_BACKEND_DISABLE);
2578 if (data & 1)
2579 data &= BACKEND_DISABLE_MASK;
2580 else
2581 data = 0;
2582 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2583
2584 data >>= BACKEND_DISABLE_SHIFT;
2585
2586 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2587
2588 return data & mask;
2589}
2590
2591/**
2592 * cik_setup_rb - setup the RBs on the asic
2593 *
2594 * @rdev: radeon_device pointer
2595 * @se_num: number of SEs (shader engines) for the asic
2596 * @sh_per_se: number of SH blocks per SE for the asic
2597 * @max_rb_num: max RBs (render backends) for the asic
2598 *
2599 * Configures per-SE/SH RB registers (CIK).
2600 */
2601static void cik_setup_rb(struct radeon_device *rdev,
2602 u32 se_num, u32 sh_per_se,
2603 u32 max_rb_num)
2604{
2605 int i, j;
2606 u32 data, mask;
2607 u32 disabled_rbs = 0;
2608 u32 enabled_rbs = 0;
2609
2610 for (i = 0; i < se_num; i++) {
2611 for (j = 0; j < sh_per_se; j++) {
2612 cik_select_se_sh(rdev, i, j);
2613 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2614 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2615 }
2616 }
2617 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2618
2619 mask = 1;
2620 for (i = 0; i < max_rb_num; i++) {
2621 if (!(disabled_rbs & mask))
2622 enabled_rbs |= mask;
2623 mask <<= 1;
2624 }
2625
2626 for (i = 0; i < se_num; i++) {
2627 cik_select_se_sh(rdev, i, 0xffffffff);
2628 data = 0;
2629 for (j = 0; j < sh_per_se; j++) {
2630 switch (enabled_rbs & 3) {
2631 case 1:
2632 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2633 break;
2634 case 2:
2635 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2636 break;
2637 case 3:
2638 default:
2639 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2640 break;
2641 }
2642 enabled_rbs >>= 2;
2643 }
2644 WREG32(PA_SC_RASTER_CONFIG, data);
2645 }
2646 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2647}
2648
2649/**
2650 * cik_gpu_init - setup the 3D engine
2651 *
2652 * @rdev: radeon_device pointer
2653 *
2654 * Configures the 3D engine and tiling configuration
2655 * registers so that the 3D engine is usable.
2656 */
2657static void cik_gpu_init(struct radeon_device *rdev)
2658{
2659 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2660 u32 mc_shared_chmap, mc_arb_ramcfg;
2661 u32 hdp_host_path_cntl;
2662 u32 tmp;
2663 int i, j;
2664
2665 switch (rdev->family) {
2666 case CHIP_BONAIRE:
2667 rdev->config.cik.max_shader_engines = 2;
2668 rdev->config.cik.max_tile_pipes = 4;
2669 rdev->config.cik.max_cu_per_sh = 7;
2670 rdev->config.cik.max_sh_per_se = 1;
2671 rdev->config.cik.max_backends_per_se = 2;
2672 rdev->config.cik.max_texture_channel_caches = 4;
2673 rdev->config.cik.max_gprs = 256;
2674 rdev->config.cik.max_gs_threads = 32;
2675 rdev->config.cik.max_hw_contexts = 8;
2676
2677 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2678 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2679 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2680 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2681 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2682 break;
2683 case CHIP_KAVERI:
2684 /* TODO */
2685 break;
2686 case CHIP_KABINI:
2687 default:
2688 rdev->config.cik.max_shader_engines = 1;
2689 rdev->config.cik.max_tile_pipes = 2;
2690 rdev->config.cik.max_cu_per_sh = 2;
2691 rdev->config.cik.max_sh_per_se = 1;
2692 rdev->config.cik.max_backends_per_se = 1;
2693 rdev->config.cik.max_texture_channel_caches = 2;
2694 rdev->config.cik.max_gprs = 256;
2695 rdev->config.cik.max_gs_threads = 16;
2696 rdev->config.cik.max_hw_contexts = 8;
2697
2698 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2699 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2700 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2701 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2702 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2703 break;
2704 }
2705
2706 /* Initialize HDP */
2707 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2708 WREG32((0x2c14 + j), 0x00000000);
2709 WREG32((0x2c18 + j), 0x00000000);
2710 WREG32((0x2c1c + j), 0x00000000);
2711 WREG32((0x2c20 + j), 0x00000000);
2712 WREG32((0x2c24 + j), 0x00000000);
2713 }
2714
2715 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2716
2717 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2718
2719 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2720 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2721
2722 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2723 rdev->config.cik.mem_max_burst_length_bytes = 256;
2724 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2725 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2726 if (rdev->config.cik.mem_row_size_in_kb > 4)
2727 rdev->config.cik.mem_row_size_in_kb = 4;
2728 /* XXX use MC settings? */
2729 rdev->config.cik.shader_engine_tile_size = 32;
2730 rdev->config.cik.num_gpus = 1;
2731 rdev->config.cik.multi_gpu_tile_size = 64;
2732
2733 /* fix up row size */
2734 gb_addr_config &= ~ROW_SIZE_MASK;
2735 switch (rdev->config.cik.mem_row_size_in_kb) {
2736 case 1:
2737 default:
2738 gb_addr_config |= ROW_SIZE(0);
2739 break;
2740 case 2:
2741 gb_addr_config |= ROW_SIZE(1);
2742 break;
2743 case 4:
2744 gb_addr_config |= ROW_SIZE(2);
2745 break;
2746 }
2747
2748 /* setup tiling info dword. gb_addr_config is not adequate since it does
2749 * not have bank info, so create a custom tiling dword.
2750 * bits 3:0 num_pipes
2751 * bits 7:4 num_banks
2752 * bits 11:8 group_size
2753 * bits 15:12 row_size
2754 */
2755 rdev->config.cik.tile_config = 0;
2756 switch (rdev->config.cik.num_tile_pipes) {
2757 case 1:
2758 rdev->config.cik.tile_config |= (0 << 0);
2759 break;
2760 case 2:
2761 rdev->config.cik.tile_config |= (1 << 0);
2762 break;
2763 case 4:
2764 rdev->config.cik.tile_config |= (2 << 0);
2765 break;
2766 case 8:
2767 default:
2768 /* XXX what about 12? */
2769 rdev->config.cik.tile_config |= (3 << 0);
2770 break;
2771 }
2772 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2773 rdev->config.cik.tile_config |= 1 << 4;
2774 else
2775 rdev->config.cik.tile_config |= 0 << 4;
2776 rdev->config.cik.tile_config |=
2777 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2778 rdev->config.cik.tile_config |=
2779 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2780
2781 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2782 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2783 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04002784 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2785 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04002786 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2787 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2788 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04002789
2790 cik_tiling_mode_table_init(rdev);
2791
2792 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2793 rdev->config.cik.max_sh_per_se,
2794 rdev->config.cik.max_backends_per_se);
2795
2796 /* set HW defaults for 3D engine */
2797 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2798
2799 WREG32(SX_DEBUG_1, 0x20);
2800
2801 WREG32(TA_CNTL_AUX, 0x00010000);
2802
2803 tmp = RREG32(SPI_CONFIG_CNTL);
2804 tmp |= 0x03000000;
2805 WREG32(SPI_CONFIG_CNTL, tmp);
2806
2807 WREG32(SQ_CONFIG, 1);
2808
2809 WREG32(DB_DEBUG, 0);
2810
2811 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2812 tmp |= 0x00000400;
2813 WREG32(DB_DEBUG2, tmp);
2814
2815 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2816 tmp |= 0x00020200;
2817 WREG32(DB_DEBUG3, tmp);
2818
2819 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2820 tmp |= 0x00018208;
2821 WREG32(CB_HW_CONTROL, tmp);
2822
2823 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2824
2825 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2826 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2827 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2828 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2829
2830 WREG32(VGT_NUM_INSTANCES, 1);
2831
2832 WREG32(CP_PERFMON_CNTL, 0);
2833
2834 WREG32(SQ_CONFIG, 0);
2835
2836 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2837 FORCE_EOV_MAX_REZ_CNT(255)));
2838
2839 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2840 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2841
2842 WREG32(VGT_GS_VERTEX_REUSE, 16);
2843 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2844
2845 tmp = RREG32(HDP_MISC_CNTL);
2846 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2847 WREG32(HDP_MISC_CNTL, tmp);
2848
2849 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2850 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2851
2852 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2853 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2854
2855 udelay(50);
2856}
2857
Alex Deucher841cf442012-12-18 21:47:44 -05002858/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002859 * GPU scratch registers helpers function.
2860 */
2861/**
2862 * cik_scratch_init - setup driver info for CP scratch regs
2863 *
2864 * @rdev: radeon_device pointer
2865 *
2866 * Set up the number and offset of the CP scratch registers.
2867 * NOTE: use of CP scratch registers is a legacy inferface and
2868 * is not used by default on newer asics (r6xx+). On newer asics,
2869 * memory buffers are used for fences rather than scratch regs.
2870 */
2871static void cik_scratch_init(struct radeon_device *rdev)
2872{
2873 int i;
2874
2875 rdev->scratch.num_reg = 7;
2876 rdev->scratch.reg_base = SCRATCH_REG0;
2877 for (i = 0; i < rdev->scratch.num_reg; i++) {
2878 rdev->scratch.free[i] = true;
2879 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2880 }
2881}
2882
2883/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002884 * cik_ring_test - basic gfx ring test
2885 *
2886 * @rdev: radeon_device pointer
2887 * @ring: radeon_ring structure holding ring information
2888 *
2889 * Allocate a scratch register and write to it using the gfx ring (CIK).
2890 * Provides a basic gfx ring test to verify that the ring is working.
2891 * Used by cik_cp_gfx_resume();
2892 * Returns 0 on success, error on failure.
2893 */
2894int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2895{
2896 uint32_t scratch;
2897 uint32_t tmp = 0;
2898 unsigned i;
2899 int r;
2900
2901 r = radeon_scratch_get(rdev, &scratch);
2902 if (r) {
2903 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2904 return r;
2905 }
2906 WREG32(scratch, 0xCAFEDEAD);
2907 r = radeon_ring_lock(rdev, ring, 3);
2908 if (r) {
2909 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2910 radeon_scratch_free(rdev, scratch);
2911 return r;
2912 }
2913 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2914 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2915 radeon_ring_write(ring, 0xDEADBEEF);
2916 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002917
Alex Deucherfbc832c2012-07-20 14:41:35 -04002918 for (i = 0; i < rdev->usec_timeout; i++) {
2919 tmp = RREG32(scratch);
2920 if (tmp == 0xDEADBEEF)
2921 break;
2922 DRM_UDELAY(1);
2923 }
2924 if (i < rdev->usec_timeout) {
2925 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2926 } else {
2927 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2928 ring->idx, scratch, tmp);
2929 r = -EINVAL;
2930 }
2931 radeon_scratch_free(rdev, scratch);
2932 return r;
2933}
2934
2935/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002936 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002937 *
2938 * @rdev: radeon_device pointer
2939 * @fence: radeon fence object
2940 *
2941 * Emits a fence sequnce number on the gfx ring and flushes
2942 * GPU caches.
2943 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002944void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2945 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002946{
2947 struct radeon_ring *ring = &rdev->ring[fence->ring];
2948 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2949
2950 /* EVENT_WRITE_EOP - flush caches, send int */
2951 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2952 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2953 EOP_TC_ACTION_EN |
2954 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2955 EVENT_INDEX(5)));
2956 radeon_ring_write(ring, addr & 0xfffffffc);
2957 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2958 radeon_ring_write(ring, fence->seq);
2959 radeon_ring_write(ring, 0);
2960 /* HDP flush */
2961 /* We should be using the new WAIT_REG_MEM special op packet here
2962 * but it causes the CP to hang
2963 */
2964 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2965 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2966 WRITE_DATA_DST_SEL(0)));
2967 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2968 radeon_ring_write(ring, 0);
2969 radeon_ring_write(ring, 0);
2970}
2971
Alex Deucherb07fdd32013-04-11 09:36:17 -04002972/**
2973 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2974 *
2975 * @rdev: radeon_device pointer
2976 * @fence: radeon fence object
2977 *
2978 * Emits a fence sequnce number on the compute ring and flushes
2979 * GPU caches.
2980 */
2981void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2982 struct radeon_fence *fence)
2983{
2984 struct radeon_ring *ring = &rdev->ring[fence->ring];
2985 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2986
2987 /* RELEASE_MEM - flush caches, send int */
2988 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2989 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2990 EOP_TC_ACTION_EN |
2991 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2992 EVENT_INDEX(5)));
2993 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2994 radeon_ring_write(ring, addr & 0xfffffffc);
2995 radeon_ring_write(ring, upper_32_bits(addr));
2996 radeon_ring_write(ring, fence->seq);
2997 radeon_ring_write(ring, 0);
2998 /* HDP flush */
2999 /* We should be using the new WAIT_REG_MEM special op packet here
3000 * but it causes the CP to hang
3001 */
3002 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3003 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3004 WRITE_DATA_DST_SEL(0)));
3005 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3006 radeon_ring_write(ring, 0);
3007 radeon_ring_write(ring, 0);
3008}
3009
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003010void cik_semaphore_ring_emit(struct radeon_device *rdev,
3011 struct radeon_ring *ring,
3012 struct radeon_semaphore *semaphore,
3013 bool emit_wait)
3014{
3015 uint64_t addr = semaphore->gpu_addr;
3016 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3017
3018 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3019 radeon_ring_write(ring, addr & 0xffffffff);
3020 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3021}
3022
3023/*
3024 * IB stuff
3025 */
3026/**
3027 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3028 *
3029 * @rdev: radeon_device pointer
3030 * @ib: radeon indirect buffer object
3031 *
3032 * Emits an DE (drawing engine) or CE (constant engine) IB
3033 * on the gfx ring. IBs are usually generated by userspace
3034 * acceleration drivers and submitted to the kernel for
3035 * sheduling on the ring. This function schedules the IB
3036 * on the gfx ring for execution by the GPU.
3037 */
3038void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3039{
3040 struct radeon_ring *ring = &rdev->ring[ib->ring];
3041 u32 header, control = INDIRECT_BUFFER_VALID;
3042
3043 if (ib->is_const_ib) {
3044 /* set switch buffer packet before const IB */
3045 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3046 radeon_ring_write(ring, 0);
3047
3048 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3049 } else {
3050 u32 next_rptr;
3051 if (ring->rptr_save_reg) {
3052 next_rptr = ring->wptr + 3 + 4;
3053 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3054 radeon_ring_write(ring, ((ring->rptr_save_reg -
3055 PACKET3_SET_UCONFIG_REG_START) >> 2));
3056 radeon_ring_write(ring, next_rptr);
3057 } else if (rdev->wb.enabled) {
3058 next_rptr = ring->wptr + 5 + 4;
3059 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3060 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3061 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3062 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3063 radeon_ring_write(ring, next_rptr);
3064 }
3065
3066 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3067 }
3068
3069 control |= ib->length_dw |
3070 (ib->vm ? (ib->vm->id << 24) : 0);
3071
3072 radeon_ring_write(ring, header);
3073 radeon_ring_write(ring,
3074#ifdef __BIG_ENDIAN
3075 (2 << 0) |
3076#endif
3077 (ib->gpu_addr & 0xFFFFFFFC));
3078 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3079 radeon_ring_write(ring, control);
3080}
3081
Alex Deucherfbc832c2012-07-20 14:41:35 -04003082/**
3083 * cik_ib_test - basic gfx ring IB test
3084 *
3085 * @rdev: radeon_device pointer
3086 * @ring: radeon_ring structure holding ring information
3087 *
3088 * Allocate an IB and execute it on the gfx ring (CIK).
3089 * Provides a basic gfx ring test to verify that IBs are working.
3090 * Returns 0 on success, error on failure.
3091 */
3092int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3093{
3094 struct radeon_ib ib;
3095 uint32_t scratch;
3096 uint32_t tmp = 0;
3097 unsigned i;
3098 int r;
3099
3100 r = radeon_scratch_get(rdev, &scratch);
3101 if (r) {
3102 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3103 return r;
3104 }
3105 WREG32(scratch, 0xCAFEDEAD);
3106 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3107 if (r) {
3108 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3109 return r;
3110 }
3111 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3112 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3113 ib.ptr[2] = 0xDEADBEEF;
3114 ib.length_dw = 3;
3115 r = radeon_ib_schedule(rdev, &ib, NULL);
3116 if (r) {
3117 radeon_scratch_free(rdev, scratch);
3118 radeon_ib_free(rdev, &ib);
3119 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3120 return r;
3121 }
3122 r = radeon_fence_wait(ib.fence, false);
3123 if (r) {
3124 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3125 return r;
3126 }
3127 for (i = 0; i < rdev->usec_timeout; i++) {
3128 tmp = RREG32(scratch);
3129 if (tmp == 0xDEADBEEF)
3130 break;
3131 DRM_UDELAY(1);
3132 }
3133 if (i < rdev->usec_timeout) {
3134 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3135 } else {
3136 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3137 scratch, tmp);
3138 r = -EINVAL;
3139 }
3140 radeon_scratch_free(rdev, scratch);
3141 radeon_ib_free(rdev, &ib);
3142 return r;
3143}
3144
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003145/*
Alex Deucher841cf442012-12-18 21:47:44 -05003146 * CP.
3147 * On CIK, gfx and compute now have independant command processors.
3148 *
3149 * GFX
3150 * Gfx consists of a single ring and can process both gfx jobs and
3151 * compute jobs. The gfx CP consists of three microengines (ME):
3152 * PFP - Pre-Fetch Parser
3153 * ME - Micro Engine
3154 * CE - Constant Engine
3155 * The PFP and ME make up what is considered the Drawing Engine (DE).
3156 * The CE is an asynchronous engine used for updating buffer desciptors
3157 * used by the DE so that they can be loaded into cache in parallel
3158 * while the DE is processing state update packets.
3159 *
3160 * Compute
3161 * The compute CP consists of two microengines (ME):
3162 * MEC1 - Compute MicroEngine 1
3163 * MEC2 - Compute MicroEngine 2
3164 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3165 * The queues are exposed to userspace and are programmed directly
3166 * by the compute runtime.
3167 */
3168/**
3169 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3170 *
3171 * @rdev: radeon_device pointer
3172 * @enable: enable or disable the MEs
3173 *
3174 * Halts or unhalts the gfx MEs.
3175 */
3176static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3177{
3178 if (enable)
3179 WREG32(CP_ME_CNTL, 0);
3180 else {
3181 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3182 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3183 }
3184 udelay(50);
3185}
3186
3187/**
3188 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3189 *
3190 * @rdev: radeon_device pointer
3191 *
3192 * Loads the gfx PFP, ME, and CE ucode.
3193 * Returns 0 for success, -EINVAL if the ucode is not available.
3194 */
3195static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3196{
3197 const __be32 *fw_data;
3198 int i;
3199
3200 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3201 return -EINVAL;
3202
3203 cik_cp_gfx_enable(rdev, false);
3204
3205 /* PFP */
3206 fw_data = (const __be32 *)rdev->pfp_fw->data;
3207 WREG32(CP_PFP_UCODE_ADDR, 0);
3208 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3209 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3210 WREG32(CP_PFP_UCODE_ADDR, 0);
3211
3212 /* CE */
3213 fw_data = (const __be32 *)rdev->ce_fw->data;
3214 WREG32(CP_CE_UCODE_ADDR, 0);
3215 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3216 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3217 WREG32(CP_CE_UCODE_ADDR, 0);
3218
3219 /* ME */
3220 fw_data = (const __be32 *)rdev->me_fw->data;
3221 WREG32(CP_ME_RAM_WADDR, 0);
3222 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3223 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3224 WREG32(CP_ME_RAM_WADDR, 0);
3225
3226 WREG32(CP_PFP_UCODE_ADDR, 0);
3227 WREG32(CP_CE_UCODE_ADDR, 0);
3228 WREG32(CP_ME_RAM_WADDR, 0);
3229 WREG32(CP_ME_RAM_RADDR, 0);
3230 return 0;
3231}
3232
3233/**
3234 * cik_cp_gfx_start - start the gfx ring
3235 *
3236 * @rdev: radeon_device pointer
3237 *
3238 * Enables the ring and loads the clear state context and other
3239 * packets required to init the ring.
3240 * Returns 0 for success, error for failure.
3241 */
3242static int cik_cp_gfx_start(struct radeon_device *rdev)
3243{
3244 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3245 int r, i;
3246
3247 /* init the CP */
3248 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3249 WREG32(CP_ENDIAN_SWAP, 0);
3250 WREG32(CP_DEVICE_ID, 1);
3251
3252 cik_cp_gfx_enable(rdev, true);
3253
3254 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3255 if (r) {
3256 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3257 return r;
3258 }
3259
3260 /* init the CE partitions. CE only used for gfx on CIK */
3261 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3262 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3263 radeon_ring_write(ring, 0xc000);
3264 radeon_ring_write(ring, 0xc000);
3265
3266 /* setup clear context state */
3267 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3268 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3269
3270 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3271 radeon_ring_write(ring, 0x80000000);
3272 radeon_ring_write(ring, 0x80000000);
3273
3274 for (i = 0; i < cik_default_size; i++)
3275 radeon_ring_write(ring, cik_default_state[i]);
3276
3277 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3278 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3279
3280 /* set clear context state */
3281 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3282 radeon_ring_write(ring, 0);
3283
3284 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3285 radeon_ring_write(ring, 0x00000316);
3286 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3287 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3288
3289 radeon_ring_unlock_commit(rdev, ring);
3290
3291 return 0;
3292}
3293
3294/**
3295 * cik_cp_gfx_fini - stop the gfx ring
3296 *
3297 * @rdev: radeon_device pointer
3298 *
3299 * Stop the gfx ring and tear down the driver ring
3300 * info.
3301 */
3302static void cik_cp_gfx_fini(struct radeon_device *rdev)
3303{
3304 cik_cp_gfx_enable(rdev, false);
3305 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3306}
3307
3308/**
3309 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3310 *
3311 * @rdev: radeon_device pointer
3312 *
3313 * Program the location and size of the gfx ring buffer
3314 * and test it to make sure it's working.
3315 * Returns 0 for success, error for failure.
3316 */
3317static int cik_cp_gfx_resume(struct radeon_device *rdev)
3318{
3319 struct radeon_ring *ring;
3320 u32 tmp;
3321 u32 rb_bufsz;
3322 u64 rb_addr;
3323 int r;
3324
3325 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3326 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3327
3328 /* Set the write pointer delay */
3329 WREG32(CP_RB_WPTR_DELAY, 0);
3330
3331 /* set the RB to use vmid 0 */
3332 WREG32(CP_RB_VMID, 0);
3333
3334 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3335
3336 /* ring 0 - compute and gfx */
3337 /* Set ring buffer size */
3338 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3339 rb_bufsz = drm_order(ring->ring_size / 8);
3340 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3341#ifdef __BIG_ENDIAN
3342 tmp |= BUF_SWAP_32BIT;
3343#endif
3344 WREG32(CP_RB0_CNTL, tmp);
3345
3346 /* Initialize the ring buffer's read and write pointers */
3347 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3348 ring->wptr = 0;
3349 WREG32(CP_RB0_WPTR, ring->wptr);
3350
3351 /* set the wb address wether it's enabled or not */
3352 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3353 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3354
3355 /* scratch register shadowing is no longer supported */
3356 WREG32(SCRATCH_UMSK, 0);
3357
3358 if (!rdev->wb.enabled)
3359 tmp |= RB_NO_UPDATE;
3360
3361 mdelay(1);
3362 WREG32(CP_RB0_CNTL, tmp);
3363
3364 rb_addr = ring->gpu_addr >> 8;
3365 WREG32(CP_RB0_BASE, rb_addr);
3366 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3367
3368 ring->rptr = RREG32(CP_RB0_RPTR);
3369
3370 /* start the ring */
3371 cik_cp_gfx_start(rdev);
3372 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3373 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3374 if (r) {
3375 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3376 return r;
3377 }
3378 return 0;
3379}
3380
Alex Deucher963e81f2013-06-26 17:37:11 -04003381u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3382 struct radeon_ring *ring)
3383{
3384 u32 rptr;
3385
3386
3387
3388 if (rdev->wb.enabled) {
3389 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3390 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003391 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003392 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3393 rptr = RREG32(CP_HQD_PQ_RPTR);
3394 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003395 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003396 }
3397 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3398
3399 return rptr;
3400}
3401
3402u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3403 struct radeon_ring *ring)
3404{
3405 u32 wptr;
3406
3407 if (rdev->wb.enabled) {
3408 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3409 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003410 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003411 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3412 wptr = RREG32(CP_HQD_PQ_WPTR);
3413 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003414 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003415 }
3416 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3417
3418 return wptr;
3419}
3420
3421void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3422 struct radeon_ring *ring)
3423{
3424 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3425
3426 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3427 WDOORBELL32(ring->doorbell_offset, wptr);
3428}
3429
Alex Deucher841cf442012-12-18 21:47:44 -05003430/**
3431 * cik_cp_compute_enable - enable/disable the compute CP MEs
3432 *
3433 * @rdev: radeon_device pointer
3434 * @enable: enable or disable the MEs
3435 *
3436 * Halts or unhalts the compute MEs.
3437 */
3438static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3439{
3440 if (enable)
3441 WREG32(CP_MEC_CNTL, 0);
3442 else
3443 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3444 udelay(50);
3445}
3446
3447/**
3448 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3449 *
3450 * @rdev: radeon_device pointer
3451 *
3452 * Loads the compute MEC1&2 ucode.
3453 * Returns 0 for success, -EINVAL if the ucode is not available.
3454 */
3455static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3456{
3457 const __be32 *fw_data;
3458 int i;
3459
3460 if (!rdev->mec_fw)
3461 return -EINVAL;
3462
3463 cik_cp_compute_enable(rdev, false);
3464
3465 /* MEC1 */
3466 fw_data = (const __be32 *)rdev->mec_fw->data;
3467 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3468 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3469 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3470 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3471
3472 if (rdev->family == CHIP_KAVERI) {
3473 /* MEC2 */
3474 fw_data = (const __be32 *)rdev->mec_fw->data;
3475 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3476 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3477 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3478 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3479 }
3480
3481 return 0;
3482}
3483
3484/**
3485 * cik_cp_compute_start - start the compute queues
3486 *
3487 * @rdev: radeon_device pointer
3488 *
3489 * Enable the compute queues.
3490 * Returns 0 for success, error for failure.
3491 */
3492static int cik_cp_compute_start(struct radeon_device *rdev)
3493{
Alex Deucher963e81f2013-06-26 17:37:11 -04003494 cik_cp_compute_enable(rdev, true);
3495
Alex Deucher841cf442012-12-18 21:47:44 -05003496 return 0;
3497}
3498
3499/**
3500 * cik_cp_compute_fini - stop the compute queues
3501 *
3502 * @rdev: radeon_device pointer
3503 *
3504 * Stop the compute queues and tear down the driver queue
3505 * info.
3506 */
3507static void cik_cp_compute_fini(struct radeon_device *rdev)
3508{
Alex Deucher963e81f2013-06-26 17:37:11 -04003509 int i, idx, r;
3510
Alex Deucher841cf442012-12-18 21:47:44 -05003511 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04003512
3513 for (i = 0; i < 2; i++) {
3514 if (i == 0)
3515 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3516 else
3517 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3518
3519 if (rdev->ring[idx].mqd_obj) {
3520 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3521 if (unlikely(r != 0))
3522 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3523
3524 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3525 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3526
3527 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3528 rdev->ring[idx].mqd_obj = NULL;
3529 }
3530 }
Alex Deucher841cf442012-12-18 21:47:44 -05003531}
3532
Alex Deucher963e81f2013-06-26 17:37:11 -04003533static void cik_mec_fini(struct radeon_device *rdev)
3534{
3535 int r;
3536
3537 if (rdev->mec.hpd_eop_obj) {
3538 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3539 if (unlikely(r != 0))
3540 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3541 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3542 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3543
3544 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3545 rdev->mec.hpd_eop_obj = NULL;
3546 }
3547}
3548
3549#define MEC_HPD_SIZE 2048
3550
3551static int cik_mec_init(struct radeon_device *rdev)
3552{
3553 int r;
3554 u32 *hpd;
3555
3556 /*
3557 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3558 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3559 */
3560 if (rdev->family == CHIP_KAVERI)
3561 rdev->mec.num_mec = 2;
3562 else
3563 rdev->mec.num_mec = 1;
3564 rdev->mec.num_pipe = 4;
3565 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3566
3567 if (rdev->mec.hpd_eop_obj == NULL) {
3568 r = radeon_bo_create(rdev,
3569 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3570 PAGE_SIZE, true,
3571 RADEON_GEM_DOMAIN_GTT, NULL,
3572 &rdev->mec.hpd_eop_obj);
3573 if (r) {
3574 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3575 return r;
3576 }
3577 }
3578
3579 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3580 if (unlikely(r != 0)) {
3581 cik_mec_fini(rdev);
3582 return r;
3583 }
3584 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3585 &rdev->mec.hpd_eop_gpu_addr);
3586 if (r) {
3587 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3588 cik_mec_fini(rdev);
3589 return r;
3590 }
3591 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3592 if (r) {
3593 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3594 cik_mec_fini(rdev);
3595 return r;
3596 }
3597
3598 /* clear memory. Not sure if this is required or not */
3599 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3600
3601 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3602 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3603
3604 return 0;
3605}
3606
3607struct hqd_registers
3608{
3609 u32 cp_mqd_base_addr;
3610 u32 cp_mqd_base_addr_hi;
3611 u32 cp_hqd_active;
3612 u32 cp_hqd_vmid;
3613 u32 cp_hqd_persistent_state;
3614 u32 cp_hqd_pipe_priority;
3615 u32 cp_hqd_queue_priority;
3616 u32 cp_hqd_quantum;
3617 u32 cp_hqd_pq_base;
3618 u32 cp_hqd_pq_base_hi;
3619 u32 cp_hqd_pq_rptr;
3620 u32 cp_hqd_pq_rptr_report_addr;
3621 u32 cp_hqd_pq_rptr_report_addr_hi;
3622 u32 cp_hqd_pq_wptr_poll_addr;
3623 u32 cp_hqd_pq_wptr_poll_addr_hi;
3624 u32 cp_hqd_pq_doorbell_control;
3625 u32 cp_hqd_pq_wptr;
3626 u32 cp_hqd_pq_control;
3627 u32 cp_hqd_ib_base_addr;
3628 u32 cp_hqd_ib_base_addr_hi;
3629 u32 cp_hqd_ib_rptr;
3630 u32 cp_hqd_ib_control;
3631 u32 cp_hqd_iq_timer;
3632 u32 cp_hqd_iq_rptr;
3633 u32 cp_hqd_dequeue_request;
3634 u32 cp_hqd_dma_offload;
3635 u32 cp_hqd_sema_cmd;
3636 u32 cp_hqd_msg_type;
3637 u32 cp_hqd_atomic0_preop_lo;
3638 u32 cp_hqd_atomic0_preop_hi;
3639 u32 cp_hqd_atomic1_preop_lo;
3640 u32 cp_hqd_atomic1_preop_hi;
3641 u32 cp_hqd_hq_scheduler0;
3642 u32 cp_hqd_hq_scheduler1;
3643 u32 cp_mqd_control;
3644};
3645
3646struct bonaire_mqd
3647{
3648 u32 header;
3649 u32 dispatch_initiator;
3650 u32 dimensions[3];
3651 u32 start_idx[3];
3652 u32 num_threads[3];
3653 u32 pipeline_stat_enable;
3654 u32 perf_counter_enable;
3655 u32 pgm[2];
3656 u32 tba[2];
3657 u32 tma[2];
3658 u32 pgm_rsrc[2];
3659 u32 vmid;
3660 u32 resource_limits;
3661 u32 static_thread_mgmt01[2];
3662 u32 tmp_ring_size;
3663 u32 static_thread_mgmt23[2];
3664 u32 restart[3];
3665 u32 thread_trace_enable;
3666 u32 reserved1;
3667 u32 user_data[16];
3668 u32 vgtcs_invoke_count[2];
3669 struct hqd_registers queue_state;
3670 u32 dequeue_cntr;
3671 u32 interrupt_queue[64];
3672};
3673
Alex Deucher841cf442012-12-18 21:47:44 -05003674/**
3675 * cik_cp_compute_resume - setup the compute queue registers
3676 *
3677 * @rdev: radeon_device pointer
3678 *
3679 * Program the compute queues and test them to make sure they
3680 * are working.
3681 * Returns 0 for success, error for failure.
3682 */
3683static int cik_cp_compute_resume(struct radeon_device *rdev)
3684{
Alex Deucher963e81f2013-06-26 17:37:11 -04003685 int r, i, idx;
3686 u32 tmp;
3687 bool use_doorbell = true;
3688 u64 hqd_gpu_addr;
3689 u64 mqd_gpu_addr;
3690 u64 eop_gpu_addr;
3691 u64 wb_gpu_addr;
3692 u32 *buf;
3693 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05003694
Alex Deucher841cf442012-12-18 21:47:44 -05003695 r = cik_cp_compute_start(rdev);
3696 if (r)
3697 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04003698
3699 /* fix up chicken bits */
3700 tmp = RREG32(CP_CPF_DEBUG);
3701 tmp |= (1 << 23);
3702 WREG32(CP_CPF_DEBUG, tmp);
3703
3704 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04003705 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003706 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3707 int me = (i < 4) ? 1 : 2;
3708 int pipe = (i < 4) ? i : (i - 4);
3709
3710 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3711
3712 cik_srbm_select(rdev, me, pipe, 0, 0);
3713
3714 /* write the EOP addr */
3715 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3716 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3717
3718 /* set the VMID assigned */
3719 WREG32(CP_HPD_EOP_VMID, 0);
3720
3721 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3722 tmp = RREG32(CP_HPD_EOP_CONTROL);
3723 tmp &= ~EOP_SIZE_MASK;
3724 tmp |= drm_order(MEC_HPD_SIZE / 8);
3725 WREG32(CP_HPD_EOP_CONTROL, tmp);
3726 }
3727 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003728 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003729
3730 /* init the queues. Just two for now. */
3731 for (i = 0; i < 2; i++) {
3732 if (i == 0)
3733 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3734 else
3735 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3736
3737 if (rdev->ring[idx].mqd_obj == NULL) {
3738 r = radeon_bo_create(rdev,
3739 sizeof(struct bonaire_mqd),
3740 PAGE_SIZE, true,
3741 RADEON_GEM_DOMAIN_GTT, NULL,
3742 &rdev->ring[idx].mqd_obj);
3743 if (r) {
3744 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3745 return r;
3746 }
3747 }
3748
3749 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3750 if (unlikely(r != 0)) {
3751 cik_cp_compute_fini(rdev);
3752 return r;
3753 }
3754 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3755 &mqd_gpu_addr);
3756 if (r) {
3757 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3758 cik_cp_compute_fini(rdev);
3759 return r;
3760 }
3761 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3762 if (r) {
3763 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3764 cik_cp_compute_fini(rdev);
3765 return r;
3766 }
3767
3768 /* doorbell offset */
3769 rdev->ring[idx].doorbell_offset =
3770 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3771
3772 /* init the mqd struct */
3773 memset(buf, 0, sizeof(struct bonaire_mqd));
3774
3775 mqd = (struct bonaire_mqd *)buf;
3776 mqd->header = 0xC0310800;
3777 mqd->static_thread_mgmt01[0] = 0xffffffff;
3778 mqd->static_thread_mgmt01[1] = 0xffffffff;
3779 mqd->static_thread_mgmt23[0] = 0xffffffff;
3780 mqd->static_thread_mgmt23[1] = 0xffffffff;
3781
Alex Deucherf61d5b462013-08-06 12:40:16 -04003782 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003783 cik_srbm_select(rdev, rdev->ring[idx].me,
3784 rdev->ring[idx].pipe,
3785 rdev->ring[idx].queue, 0);
3786
3787 /* disable wptr polling */
3788 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3789 tmp &= ~WPTR_POLL_EN;
3790 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3791
3792 /* enable doorbell? */
3793 mqd->queue_state.cp_hqd_pq_doorbell_control =
3794 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3795 if (use_doorbell)
3796 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3797 else
3798 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3799 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3800 mqd->queue_state.cp_hqd_pq_doorbell_control);
3801
3802 /* disable the queue if it's active */
3803 mqd->queue_state.cp_hqd_dequeue_request = 0;
3804 mqd->queue_state.cp_hqd_pq_rptr = 0;
3805 mqd->queue_state.cp_hqd_pq_wptr= 0;
3806 if (RREG32(CP_HQD_ACTIVE) & 1) {
3807 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3808 for (i = 0; i < rdev->usec_timeout; i++) {
3809 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3810 break;
3811 udelay(1);
3812 }
3813 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3814 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3815 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3816 }
3817
3818 /* set the pointer to the MQD */
3819 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3820 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3821 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3822 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3823 /* set MQD vmid to 0 */
3824 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3825 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3826 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3827
3828 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3829 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3830 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3831 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3832 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3833 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3834
3835 /* set up the HQD, this is similar to CP_RB0_CNTL */
3836 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3837 mqd->queue_state.cp_hqd_pq_control &=
3838 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3839
3840 mqd->queue_state.cp_hqd_pq_control |=
3841 drm_order(rdev->ring[idx].ring_size / 8);
3842 mqd->queue_state.cp_hqd_pq_control |=
3843 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3844#ifdef __BIG_ENDIAN
3845 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3846#endif
3847 mqd->queue_state.cp_hqd_pq_control &=
3848 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3849 mqd->queue_state.cp_hqd_pq_control |=
3850 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3851 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3852
3853 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3854 if (i == 0)
3855 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3856 else
3857 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3858 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3859 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3860 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3861 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3862 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3863
3864 /* set the wb address wether it's enabled or not */
3865 if (i == 0)
3866 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3867 else
3868 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3869 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3870 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3871 upper_32_bits(wb_gpu_addr) & 0xffff;
3872 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3873 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3874 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3875 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3876
3877 /* enable the doorbell if requested */
3878 if (use_doorbell) {
3879 mqd->queue_state.cp_hqd_pq_doorbell_control =
3880 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3881 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3882 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3883 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3884 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3885 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3886 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3887
3888 } else {
3889 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3890 }
3891 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3892 mqd->queue_state.cp_hqd_pq_doorbell_control);
3893
3894 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3895 rdev->ring[idx].wptr = 0;
3896 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3897 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3898 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3899 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3900
3901 /* set the vmid for the queue */
3902 mqd->queue_state.cp_hqd_vmid = 0;
3903 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3904
3905 /* activate the queue */
3906 mqd->queue_state.cp_hqd_active = 1;
3907 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3908
3909 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003910 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003911
3912 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3913 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3914
3915 rdev->ring[idx].ready = true;
3916 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3917 if (r)
3918 rdev->ring[idx].ready = false;
3919 }
3920
Alex Deucher841cf442012-12-18 21:47:44 -05003921 return 0;
3922}
3923
Alex Deucher841cf442012-12-18 21:47:44 -05003924static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3925{
3926 cik_cp_gfx_enable(rdev, enable);
3927 cik_cp_compute_enable(rdev, enable);
3928}
3929
Alex Deucher841cf442012-12-18 21:47:44 -05003930static int cik_cp_load_microcode(struct radeon_device *rdev)
3931{
3932 int r;
3933
3934 r = cik_cp_gfx_load_microcode(rdev);
3935 if (r)
3936 return r;
3937 r = cik_cp_compute_load_microcode(rdev);
3938 if (r)
3939 return r;
3940
3941 return 0;
3942}
3943
Alex Deucher841cf442012-12-18 21:47:44 -05003944static void cik_cp_fini(struct radeon_device *rdev)
3945{
3946 cik_cp_gfx_fini(rdev);
3947 cik_cp_compute_fini(rdev);
3948}
3949
Alex Deucher841cf442012-12-18 21:47:44 -05003950static int cik_cp_resume(struct radeon_device *rdev)
3951{
3952 int r;
3953
3954 /* Reset all cp blocks */
3955 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3956 RREG32(GRBM_SOFT_RESET);
3957 mdelay(15);
3958 WREG32(GRBM_SOFT_RESET, 0);
3959 RREG32(GRBM_SOFT_RESET);
3960
3961 r = cik_cp_load_microcode(rdev);
3962 if (r)
3963 return r;
3964
3965 r = cik_cp_gfx_resume(rdev);
3966 if (r)
3967 return r;
3968 r = cik_cp_compute_resume(rdev);
3969 if (r)
3970 return r;
3971
3972 return 0;
3973}
3974
Alex Deucher21a93e12013-04-09 12:47:11 -04003975/*
3976 * sDMA - System DMA
3977 * Starting with CIK, the GPU has new asynchronous
3978 * DMA engines. These engines are used for compute
3979 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3980 * and each one supports 1 ring buffer used for gfx
3981 * and 2 queues used for compute.
3982 *
3983 * The programming model is very similar to the CP
3984 * (ring buffer, IBs, etc.), but sDMA has it's own
3985 * packet format that is different from the PM4 format
3986 * used by the CP. sDMA supports copying data, writing
3987 * embedded data, solid fills, and a number of other
3988 * things. It also has support for tiling/detiling of
3989 * buffers.
3990 */
3991/**
3992 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3993 *
3994 * @rdev: radeon_device pointer
3995 * @ib: IB object to schedule
3996 *
3997 * Schedule an IB in the DMA ring (CIK).
3998 */
3999void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4000 struct radeon_ib *ib)
4001{
4002 struct radeon_ring *ring = &rdev->ring[ib->ring];
4003 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4004
4005 if (rdev->wb.enabled) {
4006 u32 next_rptr = ring->wptr + 5;
4007 while ((next_rptr & 7) != 4)
4008 next_rptr++;
4009 next_rptr += 4;
4010 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4011 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4012 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4013 radeon_ring_write(ring, 1); /* number of DWs to follow */
4014 radeon_ring_write(ring, next_rptr);
4015 }
4016
4017 /* IB packet must end on a 8 DW boundary */
4018 while ((ring->wptr & 7) != 4)
4019 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4020 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4021 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4022 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4023 radeon_ring_write(ring, ib->length_dw);
4024
4025}
4026
4027/**
4028 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4029 *
4030 * @rdev: radeon_device pointer
4031 * @fence: radeon fence object
4032 *
4033 * Add a DMA fence packet to the ring to write
4034 * the fence seq number and DMA trap packet to generate
4035 * an interrupt if needed (CIK).
4036 */
4037void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4038 struct radeon_fence *fence)
4039{
4040 struct radeon_ring *ring = &rdev->ring[fence->ring];
4041 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4042 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4043 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4044 u32 ref_and_mask;
4045
4046 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4047 ref_and_mask = SDMA0;
4048 else
4049 ref_and_mask = SDMA1;
4050
4051 /* write the fence */
4052 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4053 radeon_ring_write(ring, addr & 0xffffffff);
4054 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4055 radeon_ring_write(ring, fence->seq);
4056 /* generate an interrupt */
4057 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4058 /* flush HDP */
4059 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4060 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4061 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4062 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4063 radeon_ring_write(ring, ref_and_mask); /* MASK */
4064 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4065}
4066
4067/**
4068 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4069 *
4070 * @rdev: radeon_device pointer
4071 * @ring: radeon_ring structure holding ring information
4072 * @semaphore: radeon semaphore object
4073 * @emit_wait: wait or signal semaphore
4074 *
4075 * Add a DMA semaphore packet to the ring wait on or signal
4076 * other rings (CIK).
4077 */
4078void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4079 struct radeon_ring *ring,
4080 struct radeon_semaphore *semaphore,
4081 bool emit_wait)
4082{
4083 u64 addr = semaphore->gpu_addr;
4084 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4085
4086 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4087 radeon_ring_write(ring, addr & 0xfffffff8);
4088 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4089}
4090
4091/**
4092 * cik_sdma_gfx_stop - stop the gfx async dma engines
4093 *
4094 * @rdev: radeon_device pointer
4095 *
4096 * Stop the gfx async dma ring buffers (CIK).
4097 */
4098static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4099{
4100 u32 rb_cntl, reg_offset;
4101 int i;
4102
4103 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4104
4105 for (i = 0; i < 2; i++) {
4106 if (i == 0)
4107 reg_offset = SDMA0_REGISTER_OFFSET;
4108 else
4109 reg_offset = SDMA1_REGISTER_OFFSET;
4110 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4111 rb_cntl &= ~SDMA_RB_ENABLE;
4112 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4113 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4114 }
4115}
4116
4117/**
4118 * cik_sdma_rlc_stop - stop the compute async dma engines
4119 *
4120 * @rdev: radeon_device pointer
4121 *
4122 * Stop the compute async dma queues (CIK).
4123 */
4124static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4125{
4126 /* XXX todo */
4127}
4128
4129/**
4130 * cik_sdma_enable - stop the async dma engines
4131 *
4132 * @rdev: radeon_device pointer
4133 * @enable: enable/disable the DMA MEs.
4134 *
4135 * Halt or unhalt the async dma engines (CIK).
4136 */
4137static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4138{
4139 u32 me_cntl, reg_offset;
4140 int i;
4141
4142 for (i = 0; i < 2; i++) {
4143 if (i == 0)
4144 reg_offset = SDMA0_REGISTER_OFFSET;
4145 else
4146 reg_offset = SDMA1_REGISTER_OFFSET;
4147 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4148 if (enable)
4149 me_cntl &= ~SDMA_HALT;
4150 else
4151 me_cntl |= SDMA_HALT;
4152 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4153 }
4154}
4155
4156/**
4157 * cik_sdma_gfx_resume - setup and start the async dma engines
4158 *
4159 * @rdev: radeon_device pointer
4160 *
4161 * Set up the gfx DMA ring buffers and enable them (CIK).
4162 * Returns 0 for success, error for failure.
4163 */
4164static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4165{
4166 struct radeon_ring *ring;
4167 u32 rb_cntl, ib_cntl;
4168 u32 rb_bufsz;
4169 u32 reg_offset, wb_offset;
4170 int i, r;
4171
4172 for (i = 0; i < 2; i++) {
4173 if (i == 0) {
4174 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4175 reg_offset = SDMA0_REGISTER_OFFSET;
4176 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4177 } else {
4178 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4179 reg_offset = SDMA1_REGISTER_OFFSET;
4180 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4181 }
4182
4183 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4184 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4185
4186 /* Set ring buffer size in dwords */
4187 rb_bufsz = drm_order(ring->ring_size / 4);
4188 rb_cntl = rb_bufsz << 1;
4189#ifdef __BIG_ENDIAN
4190 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4191#endif
4192 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4193
4194 /* Initialize the ring buffer's read and write pointers */
4195 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4196 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4197
4198 /* set the wb address whether it's enabled or not */
4199 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4200 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4201 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4202 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4203
4204 if (rdev->wb.enabled)
4205 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4206
4207 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4208 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4209
4210 ring->wptr = 0;
4211 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4212
4213 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4214
4215 /* enable DMA RB */
4216 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4217
4218 ib_cntl = SDMA_IB_ENABLE;
4219#ifdef __BIG_ENDIAN
4220 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4221#endif
4222 /* enable DMA IBs */
4223 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4224
4225 ring->ready = true;
4226
4227 r = radeon_ring_test(rdev, ring->idx, ring);
4228 if (r) {
4229 ring->ready = false;
4230 return r;
4231 }
4232 }
4233
4234 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4235
4236 return 0;
4237}
4238
4239/**
4240 * cik_sdma_rlc_resume - setup and start the async dma engines
4241 *
4242 * @rdev: radeon_device pointer
4243 *
4244 * Set up the compute DMA queues and enable them (CIK).
4245 * Returns 0 for success, error for failure.
4246 */
4247static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4248{
4249 /* XXX todo */
4250 return 0;
4251}
4252
4253/**
4254 * cik_sdma_load_microcode - load the sDMA ME ucode
4255 *
4256 * @rdev: radeon_device pointer
4257 *
4258 * Loads the sDMA0/1 ucode.
4259 * Returns 0 for success, -EINVAL if the ucode is not available.
4260 */
4261static int cik_sdma_load_microcode(struct radeon_device *rdev)
4262{
4263 const __be32 *fw_data;
4264 int i;
4265
4266 if (!rdev->sdma_fw)
4267 return -EINVAL;
4268
4269 /* stop the gfx rings and rlc compute queues */
4270 cik_sdma_gfx_stop(rdev);
4271 cik_sdma_rlc_stop(rdev);
4272
4273 /* halt the MEs */
4274 cik_sdma_enable(rdev, false);
4275
4276 /* sdma0 */
4277 fw_data = (const __be32 *)rdev->sdma_fw->data;
4278 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4279 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4280 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4281 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4282
4283 /* sdma1 */
4284 fw_data = (const __be32 *)rdev->sdma_fw->data;
4285 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4286 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4287 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4288 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4289
4290 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4291 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4292 return 0;
4293}
4294
4295/**
4296 * cik_sdma_resume - setup and start the async dma engines
4297 *
4298 * @rdev: radeon_device pointer
4299 *
4300 * Set up the DMA engines and enable them (CIK).
4301 * Returns 0 for success, error for failure.
4302 */
4303static int cik_sdma_resume(struct radeon_device *rdev)
4304{
4305 int r;
4306
4307 /* Reset dma */
4308 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4309 RREG32(SRBM_SOFT_RESET);
4310 udelay(50);
4311 WREG32(SRBM_SOFT_RESET, 0);
4312 RREG32(SRBM_SOFT_RESET);
4313
4314 r = cik_sdma_load_microcode(rdev);
4315 if (r)
4316 return r;
4317
4318 /* unhalt the MEs */
4319 cik_sdma_enable(rdev, true);
4320
4321 /* start the gfx rings and rlc compute queues */
4322 r = cik_sdma_gfx_resume(rdev);
4323 if (r)
4324 return r;
4325 r = cik_sdma_rlc_resume(rdev);
4326 if (r)
4327 return r;
4328
4329 return 0;
4330}
4331
4332/**
4333 * cik_sdma_fini - tear down the async dma engines
4334 *
4335 * @rdev: radeon_device pointer
4336 *
4337 * Stop the async dma engines and free the rings (CIK).
4338 */
4339static void cik_sdma_fini(struct radeon_device *rdev)
4340{
4341 /* stop the gfx rings and rlc compute queues */
4342 cik_sdma_gfx_stop(rdev);
4343 cik_sdma_rlc_stop(rdev);
4344 /* halt the MEs */
4345 cik_sdma_enable(rdev, false);
4346 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4347 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4348 /* XXX - compute dma queue tear down */
4349}
4350
4351/**
4352 * cik_copy_dma - copy pages using the DMA engine
4353 *
4354 * @rdev: radeon_device pointer
4355 * @src_offset: src GPU address
4356 * @dst_offset: dst GPU address
4357 * @num_gpu_pages: number of GPU pages to xfer
4358 * @fence: radeon fence object
4359 *
4360 * Copy GPU paging using the DMA engine (CIK).
4361 * Used by the radeon ttm implementation to move pages if
4362 * registered as the asic copy callback.
4363 */
4364int cik_copy_dma(struct radeon_device *rdev,
4365 uint64_t src_offset, uint64_t dst_offset,
4366 unsigned num_gpu_pages,
4367 struct radeon_fence **fence)
4368{
4369 struct radeon_semaphore *sem = NULL;
4370 int ring_index = rdev->asic->copy.dma_ring_index;
4371 struct radeon_ring *ring = &rdev->ring[ring_index];
4372 u32 size_in_bytes, cur_size_in_bytes;
4373 int i, num_loops;
4374 int r = 0;
4375
4376 r = radeon_semaphore_create(rdev, &sem);
4377 if (r) {
4378 DRM_ERROR("radeon: moving bo (%d).\n", r);
4379 return r;
4380 }
4381
4382 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4383 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4384 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4385 if (r) {
4386 DRM_ERROR("radeon: moving bo (%d).\n", r);
4387 radeon_semaphore_free(rdev, &sem, NULL);
4388 return r;
4389 }
4390
4391 if (radeon_fence_need_sync(*fence, ring->idx)) {
4392 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4393 ring->idx);
4394 radeon_fence_note_sync(*fence, ring->idx);
4395 } else {
4396 radeon_semaphore_free(rdev, &sem, NULL);
4397 }
4398
4399 for (i = 0; i < num_loops; i++) {
4400 cur_size_in_bytes = size_in_bytes;
4401 if (cur_size_in_bytes > 0x1fffff)
4402 cur_size_in_bytes = 0x1fffff;
4403 size_in_bytes -= cur_size_in_bytes;
4404 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4405 radeon_ring_write(ring, cur_size_in_bytes);
4406 radeon_ring_write(ring, 0); /* src/dst endian swap */
4407 radeon_ring_write(ring, src_offset & 0xffffffff);
4408 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4409 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4410 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4411 src_offset += cur_size_in_bytes;
4412 dst_offset += cur_size_in_bytes;
4413 }
4414
4415 r = radeon_fence_emit(rdev, fence, ring->idx);
4416 if (r) {
4417 radeon_ring_unlock_undo(rdev, ring);
4418 return r;
4419 }
4420
4421 radeon_ring_unlock_commit(rdev, ring);
4422 radeon_semaphore_free(rdev, &sem, *fence);
4423
4424 return r;
4425}
4426
4427/**
4428 * cik_sdma_ring_test - simple async dma engine test
4429 *
4430 * @rdev: radeon_device pointer
4431 * @ring: radeon_ring structure holding ring information
4432 *
4433 * Test the DMA engine by writing using it to write an
4434 * value to memory. (CIK).
4435 * Returns 0 for success, error for failure.
4436 */
4437int cik_sdma_ring_test(struct radeon_device *rdev,
4438 struct radeon_ring *ring)
4439{
4440 unsigned i;
4441 int r;
4442 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4443 u32 tmp;
4444
4445 if (!ptr) {
4446 DRM_ERROR("invalid vram scratch pointer\n");
4447 return -EINVAL;
4448 }
4449
4450 tmp = 0xCAFEDEAD;
4451 writel(tmp, ptr);
4452
4453 r = radeon_ring_lock(rdev, ring, 4);
4454 if (r) {
4455 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4456 return r;
4457 }
4458 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4459 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4460 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4461 radeon_ring_write(ring, 1); /* number of DWs to follow */
4462 radeon_ring_write(ring, 0xDEADBEEF);
4463 radeon_ring_unlock_commit(rdev, ring);
4464
4465 for (i = 0; i < rdev->usec_timeout; i++) {
4466 tmp = readl(ptr);
4467 if (tmp == 0xDEADBEEF)
4468 break;
4469 DRM_UDELAY(1);
4470 }
4471
4472 if (i < rdev->usec_timeout) {
4473 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4474 } else {
4475 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4476 ring->idx, tmp);
4477 r = -EINVAL;
4478 }
4479 return r;
4480}
4481
4482/**
4483 * cik_sdma_ib_test - test an IB on the DMA engine
4484 *
4485 * @rdev: radeon_device pointer
4486 * @ring: radeon_ring structure holding ring information
4487 *
4488 * Test a simple IB in the DMA ring (CIK).
4489 * Returns 0 on success, error on failure.
4490 */
4491int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4492{
4493 struct radeon_ib ib;
4494 unsigned i;
4495 int r;
4496 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4497 u32 tmp = 0;
4498
4499 if (!ptr) {
4500 DRM_ERROR("invalid vram scratch pointer\n");
4501 return -EINVAL;
4502 }
4503
4504 tmp = 0xCAFEDEAD;
4505 writel(tmp, ptr);
4506
4507 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4508 if (r) {
4509 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4510 return r;
4511 }
4512
4513 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4514 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4515 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4516 ib.ptr[3] = 1;
4517 ib.ptr[4] = 0xDEADBEEF;
4518 ib.length_dw = 5;
4519
4520 r = radeon_ib_schedule(rdev, &ib, NULL);
4521 if (r) {
4522 radeon_ib_free(rdev, &ib);
4523 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4524 return r;
4525 }
4526 r = radeon_fence_wait(ib.fence, false);
4527 if (r) {
4528 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4529 return r;
4530 }
4531 for (i = 0; i < rdev->usec_timeout; i++) {
4532 tmp = readl(ptr);
4533 if (tmp == 0xDEADBEEF)
4534 break;
4535 DRM_UDELAY(1);
4536 }
4537 if (i < rdev->usec_timeout) {
4538 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4539 } else {
4540 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4541 r = -EINVAL;
4542 }
4543 radeon_ib_free(rdev, &ib);
4544 return r;
4545}
4546
Alex Deuchercc066712013-04-09 12:59:51 -04004547
4548static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4549{
4550 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4551 RREG32(GRBM_STATUS));
4552 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4553 RREG32(GRBM_STATUS2));
4554 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4555 RREG32(GRBM_STATUS_SE0));
4556 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4557 RREG32(GRBM_STATUS_SE1));
4558 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4559 RREG32(GRBM_STATUS_SE2));
4560 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4561 RREG32(GRBM_STATUS_SE3));
4562 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4563 RREG32(SRBM_STATUS));
4564 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4565 RREG32(SRBM_STATUS2));
4566 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4567 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4568 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4569 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04004570 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4571 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4572 RREG32(CP_STALLED_STAT1));
4573 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4574 RREG32(CP_STALLED_STAT2));
4575 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4576 RREG32(CP_STALLED_STAT3));
4577 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4578 RREG32(CP_CPF_BUSY_STAT));
4579 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4580 RREG32(CP_CPF_STALLED_STAT1));
4581 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4582 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4583 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4584 RREG32(CP_CPC_STALLED_STAT1));
4585 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04004586}
4587
Alex Deucher6f2043c2013-04-09 12:43:41 -04004588/**
Alex Deuchercc066712013-04-09 12:59:51 -04004589 * cik_gpu_check_soft_reset - check which blocks are busy
4590 *
4591 * @rdev: radeon_device pointer
4592 *
4593 * Check which blocks are busy and return the relevant reset
4594 * mask to be used by cik_gpu_soft_reset().
4595 * Returns a mask of the blocks to be reset.
4596 */
4597static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4598{
4599 u32 reset_mask = 0;
4600 u32 tmp;
4601
4602 /* GRBM_STATUS */
4603 tmp = RREG32(GRBM_STATUS);
4604 if (tmp & (PA_BUSY | SC_BUSY |
4605 BCI_BUSY | SX_BUSY |
4606 TA_BUSY | VGT_BUSY |
4607 DB_BUSY | CB_BUSY |
4608 GDS_BUSY | SPI_BUSY |
4609 IA_BUSY | IA_BUSY_NO_DMA))
4610 reset_mask |= RADEON_RESET_GFX;
4611
4612 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4613 reset_mask |= RADEON_RESET_CP;
4614
4615 /* GRBM_STATUS2 */
4616 tmp = RREG32(GRBM_STATUS2);
4617 if (tmp & RLC_BUSY)
4618 reset_mask |= RADEON_RESET_RLC;
4619
4620 /* SDMA0_STATUS_REG */
4621 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4622 if (!(tmp & SDMA_IDLE))
4623 reset_mask |= RADEON_RESET_DMA;
4624
4625 /* SDMA1_STATUS_REG */
4626 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4627 if (!(tmp & SDMA_IDLE))
4628 reset_mask |= RADEON_RESET_DMA1;
4629
4630 /* SRBM_STATUS2 */
4631 tmp = RREG32(SRBM_STATUS2);
4632 if (tmp & SDMA_BUSY)
4633 reset_mask |= RADEON_RESET_DMA;
4634
4635 if (tmp & SDMA1_BUSY)
4636 reset_mask |= RADEON_RESET_DMA1;
4637
4638 /* SRBM_STATUS */
4639 tmp = RREG32(SRBM_STATUS);
4640
4641 if (tmp & IH_BUSY)
4642 reset_mask |= RADEON_RESET_IH;
4643
4644 if (tmp & SEM_BUSY)
4645 reset_mask |= RADEON_RESET_SEM;
4646
4647 if (tmp & GRBM_RQ_PENDING)
4648 reset_mask |= RADEON_RESET_GRBM;
4649
4650 if (tmp & VMC_BUSY)
4651 reset_mask |= RADEON_RESET_VMC;
4652
4653 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4654 MCC_BUSY | MCD_BUSY))
4655 reset_mask |= RADEON_RESET_MC;
4656
4657 if (evergreen_is_display_hung(rdev))
4658 reset_mask |= RADEON_RESET_DISPLAY;
4659
4660 /* Skip MC reset as it's mostly likely not hung, just busy */
4661 if (reset_mask & RADEON_RESET_MC) {
4662 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4663 reset_mask &= ~RADEON_RESET_MC;
4664 }
4665
4666 return reset_mask;
4667}
4668
4669/**
4670 * cik_gpu_soft_reset - soft reset GPU
4671 *
4672 * @rdev: radeon_device pointer
4673 * @reset_mask: mask of which blocks to reset
4674 *
4675 * Soft reset the blocks specified in @reset_mask.
4676 */
4677static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4678{
4679 struct evergreen_mc_save save;
4680 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4681 u32 tmp;
4682
4683 if (reset_mask == 0)
4684 return;
4685
4686 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4687
4688 cik_print_gpu_status_regs(rdev);
4689 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4690 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4691 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4692 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4693
4694 /* stop the rlc */
4695 cik_rlc_stop(rdev);
4696
4697 /* Disable GFX parsing/prefetching */
4698 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4699
4700 /* Disable MEC parsing/prefetching */
4701 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4702
4703 if (reset_mask & RADEON_RESET_DMA) {
4704 /* sdma0 */
4705 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4706 tmp |= SDMA_HALT;
4707 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4708 }
4709 if (reset_mask & RADEON_RESET_DMA1) {
4710 /* sdma1 */
4711 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4712 tmp |= SDMA_HALT;
4713 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4714 }
4715
4716 evergreen_mc_stop(rdev, &save);
4717 if (evergreen_mc_wait_for_idle(rdev)) {
4718 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4719 }
4720
4721 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4722 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4723
4724 if (reset_mask & RADEON_RESET_CP) {
4725 grbm_soft_reset |= SOFT_RESET_CP;
4726
4727 srbm_soft_reset |= SOFT_RESET_GRBM;
4728 }
4729
4730 if (reset_mask & RADEON_RESET_DMA)
4731 srbm_soft_reset |= SOFT_RESET_SDMA;
4732
4733 if (reset_mask & RADEON_RESET_DMA1)
4734 srbm_soft_reset |= SOFT_RESET_SDMA1;
4735
4736 if (reset_mask & RADEON_RESET_DISPLAY)
4737 srbm_soft_reset |= SOFT_RESET_DC;
4738
4739 if (reset_mask & RADEON_RESET_RLC)
4740 grbm_soft_reset |= SOFT_RESET_RLC;
4741
4742 if (reset_mask & RADEON_RESET_SEM)
4743 srbm_soft_reset |= SOFT_RESET_SEM;
4744
4745 if (reset_mask & RADEON_RESET_IH)
4746 srbm_soft_reset |= SOFT_RESET_IH;
4747
4748 if (reset_mask & RADEON_RESET_GRBM)
4749 srbm_soft_reset |= SOFT_RESET_GRBM;
4750
4751 if (reset_mask & RADEON_RESET_VMC)
4752 srbm_soft_reset |= SOFT_RESET_VMC;
4753
4754 if (!(rdev->flags & RADEON_IS_IGP)) {
4755 if (reset_mask & RADEON_RESET_MC)
4756 srbm_soft_reset |= SOFT_RESET_MC;
4757 }
4758
4759 if (grbm_soft_reset) {
4760 tmp = RREG32(GRBM_SOFT_RESET);
4761 tmp |= grbm_soft_reset;
4762 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4763 WREG32(GRBM_SOFT_RESET, tmp);
4764 tmp = RREG32(GRBM_SOFT_RESET);
4765
4766 udelay(50);
4767
4768 tmp &= ~grbm_soft_reset;
4769 WREG32(GRBM_SOFT_RESET, tmp);
4770 tmp = RREG32(GRBM_SOFT_RESET);
4771 }
4772
4773 if (srbm_soft_reset) {
4774 tmp = RREG32(SRBM_SOFT_RESET);
4775 tmp |= srbm_soft_reset;
4776 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4777 WREG32(SRBM_SOFT_RESET, tmp);
4778 tmp = RREG32(SRBM_SOFT_RESET);
4779
4780 udelay(50);
4781
4782 tmp &= ~srbm_soft_reset;
4783 WREG32(SRBM_SOFT_RESET, tmp);
4784 tmp = RREG32(SRBM_SOFT_RESET);
4785 }
4786
4787 /* Wait a little for things to settle down */
4788 udelay(50);
4789
4790 evergreen_mc_resume(rdev, &save);
4791 udelay(50);
4792
4793 cik_print_gpu_status_regs(rdev);
4794}
4795
4796/**
4797 * cik_asic_reset - soft reset GPU
4798 *
4799 * @rdev: radeon_device pointer
4800 *
4801 * Look up which blocks are hung and attempt
4802 * to reset them.
4803 * Returns 0 for success.
4804 */
4805int cik_asic_reset(struct radeon_device *rdev)
4806{
4807 u32 reset_mask;
4808
4809 reset_mask = cik_gpu_check_soft_reset(rdev);
4810
4811 if (reset_mask)
4812 r600_set_bios_scratch_engine_hung(rdev, true);
4813
4814 cik_gpu_soft_reset(rdev, reset_mask);
4815
4816 reset_mask = cik_gpu_check_soft_reset(rdev);
4817
4818 if (!reset_mask)
4819 r600_set_bios_scratch_engine_hung(rdev, false);
4820
4821 return 0;
4822}
4823
4824/**
4825 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004826 *
4827 * @rdev: radeon_device pointer
4828 * @ring: radeon_ring structure holding ring information
4829 *
4830 * Check if the 3D engine is locked up (CIK).
4831 * Returns true if the engine is locked, false if not.
4832 */
Alex Deuchercc066712013-04-09 12:59:51 -04004833bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004834{
Alex Deuchercc066712013-04-09 12:59:51 -04004835 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004836
Alex Deuchercc066712013-04-09 12:59:51 -04004837 if (!(reset_mask & (RADEON_RESET_GFX |
4838 RADEON_RESET_COMPUTE |
4839 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004840 radeon_ring_lockup_update(ring);
4841 return false;
4842 }
4843 /* force CP activities */
4844 radeon_ring_force_activity(rdev, ring);
4845 return radeon_ring_test_lockup(rdev, ring);
4846}
4847
4848/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004849 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4850 *
4851 * @rdev: radeon_device pointer
4852 * @ring: radeon_ring structure holding ring information
4853 *
4854 * Check if the async DMA engine is locked up (CIK).
4855 * Returns true if the engine appears to be locked up, false if not.
4856 */
4857bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4858{
Alex Deuchercc066712013-04-09 12:59:51 -04004859 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4860 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004861
4862 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004863 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004864 else
Alex Deuchercc066712013-04-09 12:59:51 -04004865 mask = RADEON_RESET_DMA1;
4866
4867 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004868 radeon_ring_lockup_update(ring);
4869 return false;
4870 }
4871 /* force ring activities */
4872 radeon_ring_force_activity(rdev, ring);
4873 return radeon_ring_test_lockup(rdev, ring);
4874}
4875
Alex Deucher1c491652013-04-09 12:45:26 -04004876/* MC */
4877/**
4878 * cik_mc_program - program the GPU memory controller
4879 *
4880 * @rdev: radeon_device pointer
4881 *
4882 * Set the location of vram, gart, and AGP in the GPU's
4883 * physical address space (CIK).
4884 */
4885static void cik_mc_program(struct radeon_device *rdev)
4886{
4887 struct evergreen_mc_save save;
4888 u32 tmp;
4889 int i, j;
4890
4891 /* Initialize HDP */
4892 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4893 WREG32((0x2c14 + j), 0x00000000);
4894 WREG32((0x2c18 + j), 0x00000000);
4895 WREG32((0x2c1c + j), 0x00000000);
4896 WREG32((0x2c20 + j), 0x00000000);
4897 WREG32((0x2c24 + j), 0x00000000);
4898 }
4899 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4900
4901 evergreen_mc_stop(rdev, &save);
4902 if (radeon_mc_wait_for_idle(rdev)) {
4903 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4904 }
4905 /* Lockout access through VGA aperture*/
4906 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4907 /* Update configuration */
4908 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4909 rdev->mc.vram_start >> 12);
4910 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4911 rdev->mc.vram_end >> 12);
4912 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4913 rdev->vram_scratch.gpu_addr >> 12);
4914 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4915 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4916 WREG32(MC_VM_FB_LOCATION, tmp);
4917 /* XXX double check these! */
4918 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4919 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4920 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4921 WREG32(MC_VM_AGP_BASE, 0);
4922 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4923 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4924 if (radeon_mc_wait_for_idle(rdev)) {
4925 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4926 }
4927 evergreen_mc_resume(rdev, &save);
4928 /* we need to own VRAM, so turn off the VGA renderer here
4929 * to stop it overwriting our objects */
4930 rv515_vga_render_disable(rdev);
4931}
4932
4933/**
4934 * cik_mc_init - initialize the memory controller driver params
4935 *
4936 * @rdev: radeon_device pointer
4937 *
4938 * Look up the amount of vram, vram width, and decide how to place
4939 * vram and gart within the GPU's physical address space (CIK).
4940 * Returns 0 for success.
4941 */
4942static int cik_mc_init(struct radeon_device *rdev)
4943{
4944 u32 tmp;
4945 int chansize, numchan;
4946
4947 /* Get VRAM informations */
4948 rdev->mc.vram_is_ddr = true;
4949 tmp = RREG32(MC_ARB_RAMCFG);
4950 if (tmp & CHANSIZE_MASK) {
4951 chansize = 64;
4952 } else {
4953 chansize = 32;
4954 }
4955 tmp = RREG32(MC_SHARED_CHMAP);
4956 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4957 case 0:
4958 default:
4959 numchan = 1;
4960 break;
4961 case 1:
4962 numchan = 2;
4963 break;
4964 case 2:
4965 numchan = 4;
4966 break;
4967 case 3:
4968 numchan = 8;
4969 break;
4970 case 4:
4971 numchan = 3;
4972 break;
4973 case 5:
4974 numchan = 6;
4975 break;
4976 case 6:
4977 numchan = 10;
4978 break;
4979 case 7:
4980 numchan = 12;
4981 break;
4982 case 8:
4983 numchan = 16;
4984 break;
4985 }
4986 rdev->mc.vram_width = numchan * chansize;
4987 /* Could aper size report 0 ? */
4988 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4989 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4990 /* size in MB on si */
4991 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4992 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4993 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4994 si_vram_gtt_location(rdev, &rdev->mc);
4995 radeon_update_bandwidth_info(rdev);
4996
4997 return 0;
4998}
4999
5000/*
5001 * GART
5002 * VMID 0 is the physical GPU addresses as used by the kernel.
5003 * VMIDs 1-15 are used for userspace clients and are handled
5004 * by the radeon vm/hsa code.
5005 */
5006/**
5007 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5008 *
5009 * @rdev: radeon_device pointer
5010 *
5011 * Flush the TLB for the VMID 0 page table (CIK).
5012 */
5013void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5014{
5015 /* flush hdp cache */
5016 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5017
5018 /* bits 0-15 are the VM contexts0-15 */
5019 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5020}
5021
5022/**
5023 * cik_pcie_gart_enable - gart enable
5024 *
5025 * @rdev: radeon_device pointer
5026 *
5027 * This sets up the TLBs, programs the page tables for VMID0,
5028 * sets up the hw for VMIDs 1-15 which are allocated on
5029 * demand, and sets up the global locations for the LDS, GDS,
5030 * and GPUVM for FSA64 clients (CIK).
5031 * Returns 0 for success, errors for failure.
5032 */
5033static int cik_pcie_gart_enable(struct radeon_device *rdev)
5034{
5035 int r, i;
5036
5037 if (rdev->gart.robj == NULL) {
5038 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5039 return -EINVAL;
5040 }
5041 r = radeon_gart_table_vram_pin(rdev);
5042 if (r)
5043 return r;
5044 radeon_gart_restore(rdev);
5045 /* Setup TLB control */
5046 WREG32(MC_VM_MX_L1_TLB_CNTL,
5047 (0xA << 7) |
5048 ENABLE_L1_TLB |
5049 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5050 ENABLE_ADVANCED_DRIVER_MODEL |
5051 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5052 /* Setup L2 cache */
5053 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5054 ENABLE_L2_FRAGMENT_PROCESSING |
5055 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5056 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5057 EFFECTIVE_L2_QUEUE_SIZE(7) |
5058 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5059 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5060 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5061 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5062 /* setup context0 */
5063 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5064 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5065 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5066 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5067 (u32)(rdev->dummy_page.addr >> 12));
5068 WREG32(VM_CONTEXT0_CNTL2, 0);
5069 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5070 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5071
5072 WREG32(0x15D4, 0);
5073 WREG32(0x15D8, 0);
5074 WREG32(0x15DC, 0);
5075
5076 /* empty context1-15 */
5077 /* FIXME start with 4G, once using 2 level pt switch to full
5078 * vm size space
5079 */
5080 /* set vm size, must be a multiple of 4 */
5081 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5082 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5083 for (i = 1; i < 16; i++) {
5084 if (i < 8)
5085 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5086 rdev->gart.table_addr >> 12);
5087 else
5088 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5089 rdev->gart.table_addr >> 12);
5090 }
5091
5092 /* enable context1-15 */
5093 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5094 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04005095 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04005096 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04005097 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5098 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5099 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5100 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5101 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5102 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5103 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5104 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5105 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5106 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5107 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5108 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04005109
5110 /* TC cache setup ??? */
5111 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5112 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5113 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5114
5115 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5116 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5117 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5118 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5119 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5120
5121 WREG32(TC_CFG_L1_VOLATILE, 0);
5122 WREG32(TC_CFG_L2_VOLATILE, 0);
5123
5124 if (rdev->family == CHIP_KAVERI) {
5125 u32 tmp = RREG32(CHUB_CONTROL);
5126 tmp &= ~BYPASS_VM;
5127 WREG32(CHUB_CONTROL, tmp);
5128 }
5129
5130 /* XXX SH_MEM regs */
5131 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04005132 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005133 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05005134 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04005135 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04005136 WREG32(SH_MEM_CONFIG, 0);
5137 WREG32(SH_MEM_APE1_BASE, 1);
5138 WREG32(SH_MEM_APE1_LIMIT, 0);
5139 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04005140 /* SDMA GFX */
5141 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5142 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5143 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5144 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5145 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04005146 }
Alex Deucherb556b122013-01-29 10:44:22 -05005147 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04005148 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005149
5150 cik_pcie_gart_tlb_flush(rdev);
5151 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5152 (unsigned)(rdev->mc.gtt_size >> 20),
5153 (unsigned long long)rdev->gart.table_addr);
5154 rdev->gart.ready = true;
5155 return 0;
5156}
5157
5158/**
5159 * cik_pcie_gart_disable - gart disable
5160 *
5161 * @rdev: radeon_device pointer
5162 *
5163 * This disables all VM page table (CIK).
5164 */
5165static void cik_pcie_gart_disable(struct radeon_device *rdev)
5166{
5167 /* Disable all tables */
5168 WREG32(VM_CONTEXT0_CNTL, 0);
5169 WREG32(VM_CONTEXT1_CNTL, 0);
5170 /* Setup TLB control */
5171 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5172 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5173 /* Setup L2 cache */
5174 WREG32(VM_L2_CNTL,
5175 ENABLE_L2_FRAGMENT_PROCESSING |
5176 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5177 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5178 EFFECTIVE_L2_QUEUE_SIZE(7) |
5179 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5180 WREG32(VM_L2_CNTL2, 0);
5181 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5182 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5183 radeon_gart_table_vram_unpin(rdev);
5184}
5185
5186/**
5187 * cik_pcie_gart_fini - vm fini callback
5188 *
5189 * @rdev: radeon_device pointer
5190 *
5191 * Tears down the driver GART/VM setup (CIK).
5192 */
5193static void cik_pcie_gart_fini(struct radeon_device *rdev)
5194{
5195 cik_pcie_gart_disable(rdev);
5196 radeon_gart_table_vram_free(rdev);
5197 radeon_gart_fini(rdev);
5198}
5199
5200/* vm parser */
5201/**
5202 * cik_ib_parse - vm ib_parse callback
5203 *
5204 * @rdev: radeon_device pointer
5205 * @ib: indirect buffer pointer
5206 *
5207 * CIK uses hw IB checking so this is a nop (CIK).
5208 */
5209int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5210{
5211 return 0;
5212}
5213
5214/*
5215 * vm
5216 * VMID 0 is the physical GPU addresses as used by the kernel.
5217 * VMIDs 1-15 are used for userspace clients and are handled
5218 * by the radeon vm/hsa code.
5219 */
5220/**
5221 * cik_vm_init - cik vm init callback
5222 *
5223 * @rdev: radeon_device pointer
5224 *
5225 * Inits cik specific vm parameters (number of VMs, base of vram for
5226 * VMIDs 1-15) (CIK).
5227 * Returns 0 for success.
5228 */
5229int cik_vm_init(struct radeon_device *rdev)
5230{
5231 /* number of VMs */
5232 rdev->vm_manager.nvm = 16;
5233 /* base offset of vram pages */
5234 if (rdev->flags & RADEON_IS_IGP) {
5235 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5236 tmp <<= 22;
5237 rdev->vm_manager.vram_base_offset = tmp;
5238 } else
5239 rdev->vm_manager.vram_base_offset = 0;
5240
5241 return 0;
5242}
5243
5244/**
5245 * cik_vm_fini - cik vm fini callback
5246 *
5247 * @rdev: radeon_device pointer
5248 *
5249 * Tear down any asic specific VM setup (CIK).
5250 */
5251void cik_vm_fini(struct radeon_device *rdev)
5252{
5253}
5254
Alex Deucherf96ab482012-08-31 10:37:47 -04005255/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04005256 * cik_vm_decode_fault - print human readable fault info
5257 *
5258 * @rdev: radeon_device pointer
5259 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5260 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5261 *
5262 * Print human readable fault information (CIK).
5263 */
5264static void cik_vm_decode_fault(struct radeon_device *rdev,
5265 u32 status, u32 addr, u32 mc_client)
5266{
5267 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5268 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5269 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5270 char *block = (char *)&mc_client;
5271
5272 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5273 protections, vmid, addr,
5274 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5275 block, mc_id);
5276}
5277
5278/**
Alex Deucherf96ab482012-08-31 10:37:47 -04005279 * cik_vm_flush - cik vm flush using the CP
5280 *
5281 * @rdev: radeon_device pointer
5282 *
5283 * Update the page table base and flush the VM TLB
5284 * using the CP (CIK).
5285 */
5286void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5287{
5288 struct radeon_ring *ring = &rdev->ring[ridx];
5289
5290 if (vm == NULL)
5291 return;
5292
5293 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5294 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5295 WRITE_DATA_DST_SEL(0)));
5296 if (vm->id < 8) {
5297 radeon_ring_write(ring,
5298 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5299 } else {
5300 radeon_ring_write(ring,
5301 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5302 }
5303 radeon_ring_write(ring, 0);
5304 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5305
5306 /* update SH_MEM_* regs */
5307 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5308 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5309 WRITE_DATA_DST_SEL(0)));
5310 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5311 radeon_ring_write(ring, 0);
5312 radeon_ring_write(ring, VMID(vm->id));
5313
5314 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5315 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5316 WRITE_DATA_DST_SEL(0)));
5317 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5318 radeon_ring_write(ring, 0);
5319
5320 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5321 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5322 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5323 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5324
5325 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5326 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5327 WRITE_DATA_DST_SEL(0)));
5328 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5329 radeon_ring_write(ring, 0);
5330 radeon_ring_write(ring, VMID(0));
5331
5332 /* HDP flush */
5333 /* We should be using the WAIT_REG_MEM packet here like in
5334 * cik_fence_ring_emit(), but it causes the CP to hang in this
5335 * context...
5336 */
5337 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5338 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5339 WRITE_DATA_DST_SEL(0)));
5340 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5341 radeon_ring_write(ring, 0);
5342 radeon_ring_write(ring, 0);
5343
5344 /* bits 0-15 are the VM contexts0-15 */
5345 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5346 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5347 WRITE_DATA_DST_SEL(0)));
5348 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5349 radeon_ring_write(ring, 0);
5350 radeon_ring_write(ring, 1 << vm->id);
5351
Alex Deucherb07fdd32013-04-11 09:36:17 -04005352 /* compute doesn't have PFP */
5353 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5354 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5355 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5356 radeon_ring_write(ring, 0x0);
5357 }
Alex Deucherf96ab482012-08-31 10:37:47 -04005358}
5359
Alex Deucher605de6b2012-10-22 13:04:03 -04005360/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04005361 * cik_vm_set_page - update the page tables using sDMA
5362 *
5363 * @rdev: radeon_device pointer
5364 * @ib: indirect buffer to fill with commands
5365 * @pe: addr of the page entry
5366 * @addr: dst addr to write into pe
5367 * @count: number of page entries to update
5368 * @incr: increase next addr by incr bytes
5369 * @flags: access flags
5370 *
5371 * Update the page tables using CP or sDMA (CIK).
5372 */
5373void cik_vm_set_page(struct radeon_device *rdev,
5374 struct radeon_ib *ib,
5375 uint64_t pe,
5376 uint64_t addr, unsigned count,
5377 uint32_t incr, uint32_t flags)
5378{
5379 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5380 uint64_t value;
5381 unsigned ndw;
5382
5383 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5384 /* CP */
5385 while (count) {
5386 ndw = 2 + count * 2;
5387 if (ndw > 0x3FFE)
5388 ndw = 0x3FFE;
5389
5390 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5391 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5392 WRITE_DATA_DST_SEL(1));
5393 ib->ptr[ib->length_dw++] = pe;
5394 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5395 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5396 if (flags & RADEON_VM_PAGE_SYSTEM) {
5397 value = radeon_vm_map_gart(rdev, addr);
5398 value &= 0xFFFFFFFFFFFFF000ULL;
5399 } else if (flags & RADEON_VM_PAGE_VALID) {
5400 value = addr;
5401 } else {
5402 value = 0;
5403 }
5404 addr += incr;
5405 value |= r600_flags;
5406 ib->ptr[ib->length_dw++] = value;
5407 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5408 }
5409 }
5410 } else {
5411 /* DMA */
5412 if (flags & RADEON_VM_PAGE_SYSTEM) {
5413 while (count) {
5414 ndw = count * 2;
5415 if (ndw > 0xFFFFE)
5416 ndw = 0xFFFFE;
5417
5418 /* for non-physically contiguous pages (system) */
5419 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5420 ib->ptr[ib->length_dw++] = pe;
5421 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5422 ib->ptr[ib->length_dw++] = ndw;
5423 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5424 if (flags & RADEON_VM_PAGE_SYSTEM) {
5425 value = radeon_vm_map_gart(rdev, addr);
5426 value &= 0xFFFFFFFFFFFFF000ULL;
5427 } else if (flags & RADEON_VM_PAGE_VALID) {
5428 value = addr;
5429 } else {
5430 value = 0;
5431 }
5432 addr += incr;
5433 value |= r600_flags;
5434 ib->ptr[ib->length_dw++] = value;
5435 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5436 }
5437 }
5438 } else {
5439 while (count) {
5440 ndw = count;
5441 if (ndw > 0x7FFFF)
5442 ndw = 0x7FFFF;
5443
5444 if (flags & RADEON_VM_PAGE_VALID)
5445 value = addr;
5446 else
5447 value = 0;
5448 /* for physically contiguous pages (vram) */
5449 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5450 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5451 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5452 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5453 ib->ptr[ib->length_dw++] = 0;
5454 ib->ptr[ib->length_dw++] = value; /* value */
5455 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5456 ib->ptr[ib->length_dw++] = incr; /* increment size */
5457 ib->ptr[ib->length_dw++] = 0;
5458 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5459 pe += ndw * 8;
5460 addr += ndw * incr;
5461 count -= ndw;
5462 }
5463 }
5464 while (ib->length_dw & 0x7)
5465 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5466 }
5467}
5468
5469/**
Alex Deucher605de6b2012-10-22 13:04:03 -04005470 * cik_dma_vm_flush - cik vm flush using sDMA
5471 *
5472 * @rdev: radeon_device pointer
5473 *
5474 * Update the page table base and flush the VM TLB
5475 * using sDMA (CIK).
5476 */
5477void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5478{
5479 struct radeon_ring *ring = &rdev->ring[ridx];
5480 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5481 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5482 u32 ref_and_mask;
5483
5484 if (vm == NULL)
5485 return;
5486
5487 if (ridx == R600_RING_TYPE_DMA_INDEX)
5488 ref_and_mask = SDMA0;
5489 else
5490 ref_and_mask = SDMA1;
5491
5492 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5493 if (vm->id < 8) {
5494 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5495 } else {
5496 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5497 }
5498 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5499
5500 /* update SH_MEM_* regs */
5501 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5502 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5503 radeon_ring_write(ring, VMID(vm->id));
5504
5505 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5506 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5507 radeon_ring_write(ring, 0);
5508
5509 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5510 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5511 radeon_ring_write(ring, 0);
5512
5513 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5514 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5515 radeon_ring_write(ring, 1);
5516
5517 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5518 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5519 radeon_ring_write(ring, 0);
5520
5521 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5522 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5523 radeon_ring_write(ring, VMID(0));
5524
5525 /* flush HDP */
5526 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5527 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5528 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5529 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5530 radeon_ring_write(ring, ref_and_mask); /* MASK */
5531 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5532
5533 /* flush TLB */
5534 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5535 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5536 radeon_ring_write(ring, 1 << vm->id);
5537}
5538
Alex Deucherf6796ca2012-11-09 10:44:08 -05005539/*
5540 * RLC
5541 * The RLC is a multi-purpose microengine that handles a
5542 * variety of functions, the most important of which is
5543 * the interrupt controller.
5544 */
Alex Deucher866d83d2013-04-15 17:13:29 -04005545static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5546 bool enable)
Alex Deucherf6796ca2012-11-09 10:44:08 -05005547{
Alex Deucher866d83d2013-04-15 17:13:29 -04005548 u32 tmp = RREG32(CP_INT_CNTL_RING0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005549
Alex Deucher866d83d2013-04-15 17:13:29 -04005550 if (enable)
5551 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5552 else
5553 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005554 WREG32(CP_INT_CNTL_RING0, tmp);
Alex Deucher866d83d2013-04-15 17:13:29 -04005555}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005556
Alex Deucher866d83d2013-04-15 17:13:29 -04005557static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5558{
5559 u32 tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005560
Alex Deucher866d83d2013-04-15 17:13:29 -04005561 tmp = RREG32(RLC_LB_CNTL);
5562 if (enable)
5563 tmp |= LOAD_BALANCE_ENABLE;
5564 else
5565 tmp &= ~LOAD_BALANCE_ENABLE;
5566 WREG32(RLC_LB_CNTL, tmp);
5567}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005568
Alex Deucher866d83d2013-04-15 17:13:29 -04005569static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5570{
5571 u32 i, j, k;
5572 u32 mask;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005573
5574 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5575 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5576 cik_select_se_sh(rdev, i, j);
5577 for (k = 0; k < rdev->usec_timeout; k++) {
5578 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5579 break;
5580 udelay(1);
5581 }
5582 }
5583 }
5584 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5585
5586 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5587 for (k = 0; k < rdev->usec_timeout; k++) {
5588 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5589 break;
5590 udelay(1);
5591 }
5592}
5593
Alex Deucher22c775c2013-07-23 09:41:05 -04005594static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5595{
5596 u32 tmp;
5597
5598 tmp = RREG32(RLC_CNTL);
5599 if (tmp != rlc)
5600 WREG32(RLC_CNTL, rlc);
5601}
5602
5603static u32 cik_halt_rlc(struct radeon_device *rdev)
5604{
5605 u32 data, orig;
5606
5607 orig = data = RREG32(RLC_CNTL);
5608
5609 if (data & RLC_ENABLE) {
5610 u32 i;
5611
5612 data &= ~RLC_ENABLE;
5613 WREG32(RLC_CNTL, data);
5614
5615 for (i = 0; i < rdev->usec_timeout; i++) {
5616 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5617 break;
5618 udelay(1);
5619 }
5620
5621 cik_wait_for_rlc_serdes(rdev);
5622 }
5623
5624 return orig;
5625}
5626
Alex Deuchera412fce2013-04-22 20:23:31 -04005627void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5628{
5629 u32 tmp, i, mask;
5630
5631 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5632 WREG32(RLC_GPR_REG2, tmp);
5633
5634 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5635 for (i = 0; i < rdev->usec_timeout; i++) {
5636 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5637 break;
5638 udelay(1);
5639 }
5640
5641 for (i = 0; i < rdev->usec_timeout; i++) {
5642 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5643 break;
5644 udelay(1);
5645 }
5646}
5647
5648void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5649{
5650 u32 tmp;
5651
5652 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5653 WREG32(RLC_GPR_REG2, tmp);
5654}
5655
Alex Deucherf6796ca2012-11-09 10:44:08 -05005656/**
Alex Deucher866d83d2013-04-15 17:13:29 -04005657 * cik_rlc_stop - stop the RLC ME
5658 *
5659 * @rdev: radeon_device pointer
5660 *
5661 * Halt the RLC ME (MicroEngine) (CIK).
5662 */
5663static void cik_rlc_stop(struct radeon_device *rdev)
5664{
Alex Deucher22c775c2013-07-23 09:41:05 -04005665 WREG32(RLC_CNTL, 0);
Alex Deucher866d83d2013-04-15 17:13:29 -04005666
5667 cik_enable_gui_idle_interrupt(rdev, false);
5668
Alex Deucher866d83d2013-04-15 17:13:29 -04005669 cik_wait_for_rlc_serdes(rdev);
5670}
5671
5672/**
Alex Deucherf6796ca2012-11-09 10:44:08 -05005673 * cik_rlc_start - start the RLC ME
5674 *
5675 * @rdev: radeon_device pointer
5676 *
5677 * Unhalt the RLC ME (MicroEngine) (CIK).
5678 */
5679static void cik_rlc_start(struct radeon_device *rdev)
5680{
Alex Deucherf6796ca2012-11-09 10:44:08 -05005681 WREG32(RLC_CNTL, RLC_ENABLE);
5682
Alex Deucher866d83d2013-04-15 17:13:29 -04005683 cik_enable_gui_idle_interrupt(rdev, true);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005684
5685 udelay(50);
5686}
5687
5688/**
5689 * cik_rlc_resume - setup the RLC hw
5690 *
5691 * @rdev: radeon_device pointer
5692 *
5693 * Initialize the RLC registers, load the ucode,
5694 * and start the RLC (CIK).
5695 * Returns 0 for success, -EINVAL if the ucode is not available.
5696 */
5697static int cik_rlc_resume(struct radeon_device *rdev)
5698{
Alex Deucher22c775c2013-07-23 09:41:05 -04005699 u32 i, size, tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005700 const __be32 *fw_data;
5701
5702 if (!rdev->rlc_fw)
5703 return -EINVAL;
5704
5705 switch (rdev->family) {
5706 case CHIP_BONAIRE:
5707 default:
5708 size = BONAIRE_RLC_UCODE_SIZE;
5709 break;
5710 case CHIP_KAVERI:
5711 size = KV_RLC_UCODE_SIZE;
5712 break;
5713 case CHIP_KABINI:
5714 size = KB_RLC_UCODE_SIZE;
5715 break;
5716 }
5717
5718 cik_rlc_stop(rdev);
5719
Alex Deucher22c775c2013-07-23 09:41:05 -04005720 /* disable CG */
5721 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5722 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5723
Alex Deucher866d83d2013-04-15 17:13:29 -04005724 si_rlc_reset(rdev);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005725
Alex Deucher22c775c2013-07-23 09:41:05 -04005726 cik_init_pg(rdev);
5727
5728 cik_init_cg(rdev);
5729
Alex Deucherf6796ca2012-11-09 10:44:08 -05005730 WREG32(RLC_LB_CNTR_INIT, 0);
5731 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5732
5733 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5734 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5735 WREG32(RLC_LB_PARAMS, 0x00600408);
5736 WREG32(RLC_LB_CNTL, 0x80000004);
5737
5738 WREG32(RLC_MC_CNTL, 0);
5739 WREG32(RLC_UCODE_CNTL, 0);
5740
5741 fw_data = (const __be32 *)rdev->rlc_fw->data;
5742 WREG32(RLC_GPM_UCODE_ADDR, 0);
5743 for (i = 0; i < size; i++)
5744 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5745 WREG32(RLC_GPM_UCODE_ADDR, 0);
5746
Alex Deucher866d83d2013-04-15 17:13:29 -04005747 /* XXX - find out what chips support lbpw */
5748 cik_enable_lbpw(rdev, false);
5749
Alex Deucher22c775c2013-07-23 09:41:05 -04005750 if (rdev->family == CHIP_BONAIRE)
5751 WREG32(RLC_DRIVER_DMA_STATUS, 0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005752
5753 cik_rlc_start(rdev);
5754
5755 return 0;
5756}
Alex Deuchera59781b2012-11-09 10:45:57 -05005757
Alex Deucher22c775c2013-07-23 09:41:05 -04005758static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5759{
5760 u32 data, orig, tmp, tmp2;
5761
5762 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5763
5764 cik_enable_gui_idle_interrupt(rdev, enable);
5765
5766 if (enable) {
5767 tmp = cik_halt_rlc(rdev);
5768
5769 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5770 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5771 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5772 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5773 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5774
5775 cik_update_rlc(rdev, tmp);
5776
5777 data |= CGCG_EN | CGLS_EN;
5778 } else {
5779 RREG32(CB_CGTT_SCLK_CTRL);
5780 RREG32(CB_CGTT_SCLK_CTRL);
5781 RREG32(CB_CGTT_SCLK_CTRL);
5782 RREG32(CB_CGTT_SCLK_CTRL);
5783
5784 data &= ~(CGCG_EN | CGLS_EN);
5785 }
5786
5787 if (orig != data)
5788 WREG32(RLC_CGCG_CGLS_CTRL, data);
5789
5790}
5791
5792static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5793{
5794 u32 data, orig, tmp = 0;
5795
5796 if (enable) {
5797 orig = data = RREG32(CP_MEM_SLP_CNTL);
5798 data |= CP_MEM_LS_EN;
5799 if (orig != data)
5800 WREG32(CP_MEM_SLP_CNTL, data);
5801
5802 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5803 data &= 0xfffffffd;
5804 if (orig != data)
5805 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5806
5807 tmp = cik_halt_rlc(rdev);
5808
5809 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5810 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5811 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5812 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5813 WREG32(RLC_SERDES_WR_CTRL, data);
5814
5815 cik_update_rlc(rdev, tmp);
5816
5817 orig = data = RREG32(CGTS_SM_CTRL_REG);
5818 data &= ~SM_MODE_MASK;
5819 data |= SM_MODE(0x2);
5820 data |= SM_MODE_ENABLE;
5821 data &= ~CGTS_OVERRIDE;
5822 data &= ~CGTS_LS_OVERRIDE;
5823 data &= ~ON_MONITOR_ADD_MASK;
5824 data |= ON_MONITOR_ADD_EN;
5825 data |= ON_MONITOR_ADD(0x96);
5826 if (orig != data)
5827 WREG32(CGTS_SM_CTRL_REG, data);
5828 } else {
5829 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5830 data |= 0x00000002;
5831 if (orig != data)
5832 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5833
5834 data = RREG32(RLC_MEM_SLP_CNTL);
5835 if (data & RLC_MEM_LS_EN) {
5836 data &= ~RLC_MEM_LS_EN;
5837 WREG32(RLC_MEM_SLP_CNTL, data);
5838 }
5839
5840 data = RREG32(CP_MEM_SLP_CNTL);
5841 if (data & CP_MEM_LS_EN) {
5842 data &= ~CP_MEM_LS_EN;
5843 WREG32(CP_MEM_SLP_CNTL, data);
5844 }
5845
5846 orig = data = RREG32(CGTS_SM_CTRL_REG);
5847 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5848 if (orig != data)
5849 WREG32(CGTS_SM_CTRL_REG, data);
5850
5851 tmp = cik_halt_rlc(rdev);
5852
5853 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5854 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5855 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5856 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5857 WREG32(RLC_SERDES_WR_CTRL, data);
5858
5859 cik_update_rlc(rdev, tmp);
5860 }
5861}
5862
5863static const u32 mc_cg_registers[] =
5864{
5865 MC_HUB_MISC_HUB_CG,
5866 MC_HUB_MISC_SIP_CG,
5867 MC_HUB_MISC_VM_CG,
5868 MC_XPB_CLK_GAT,
5869 ATC_MISC_CG,
5870 MC_CITF_MISC_WR_CG,
5871 MC_CITF_MISC_RD_CG,
5872 MC_CITF_MISC_VM_CG,
5873 VM_L2_CG,
5874};
5875
5876static void cik_enable_mc_ls(struct radeon_device *rdev,
5877 bool enable)
5878{
5879 int i;
5880 u32 orig, data;
5881
5882 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5883 orig = data = RREG32(mc_cg_registers[i]);
5884 if (enable)
5885 data |= MC_LS_ENABLE;
5886 else
5887 data &= ~MC_LS_ENABLE;
5888 if (data != orig)
5889 WREG32(mc_cg_registers[i], data);
5890 }
5891}
5892
5893static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5894 bool enable)
5895{
5896 int i;
5897 u32 orig, data;
5898
5899 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5900 orig = data = RREG32(mc_cg_registers[i]);
5901 if (enable)
5902 data |= MC_CG_ENABLE;
5903 else
5904 data &= ~MC_CG_ENABLE;
5905 if (data != orig)
5906 WREG32(mc_cg_registers[i], data);
5907 }
5908}
5909
5910static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5911 bool enable)
5912{
5913 u32 orig, data;
5914
5915 if (enable) {
5916 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5917 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5918 } else {
5919 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5920 data |= 0xff000000;
5921 if (data != orig)
5922 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5923
5924 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5925 data |= 0xff000000;
5926 if (data != orig)
5927 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5928 }
5929}
5930
5931static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5932 bool enable)
5933{
5934 u32 orig, data;
5935
5936 if (enable) {
5937 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5938 data |= 0x100;
5939 if (orig != data)
5940 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5941
5942 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5943 data |= 0x100;
5944 if (orig != data)
5945 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5946 } else {
5947 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5948 data &= ~0x100;
5949 if (orig != data)
5950 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5951
5952 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5953 data &= ~0x100;
5954 if (orig != data)
5955 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5956 }
5957}
5958
5959static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5960 bool enable)
5961{
5962 u32 orig, data;
5963
5964 if (enable) {
5965 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5966 data = 0xfff;
5967 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5968
5969 orig = data = RREG32(UVD_CGC_CTRL);
5970 data |= DCM;
5971 if (orig != data)
5972 WREG32(UVD_CGC_CTRL, data);
5973 } else {
5974 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5975 data &= ~0xfff;
5976 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5977
5978 orig = data = RREG32(UVD_CGC_CTRL);
5979 data &= ~DCM;
5980 if (orig != data)
5981 WREG32(UVD_CGC_CTRL, data);
5982 }
5983}
5984
5985static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5986 bool enable)
5987{
5988 u32 orig, data;
5989
5990 orig = data = RREG32(HDP_HOST_PATH_CNTL);
5991
5992 if (enable)
5993 data &= ~CLOCK_GATING_DIS;
5994 else
5995 data |= CLOCK_GATING_DIS;
5996
5997 if (orig != data)
5998 WREG32(HDP_HOST_PATH_CNTL, data);
5999}
6000
6001static void cik_enable_hdp_ls(struct radeon_device *rdev,
6002 bool enable)
6003{
6004 u32 orig, data;
6005
6006 orig = data = RREG32(HDP_MEM_POWER_LS);
6007
6008 if (enable)
6009 data |= HDP_LS_ENABLE;
6010 else
6011 data &= ~HDP_LS_ENABLE;
6012
6013 if (orig != data)
6014 WREG32(HDP_MEM_POWER_LS, data);
6015}
6016
6017void cik_update_cg(struct radeon_device *rdev,
6018 u32 block, bool enable)
6019{
6020 if (block & RADEON_CG_BLOCK_GFX) {
6021 /* order matters! */
6022 if (enable) {
6023 cik_enable_mgcg(rdev, true);
6024 cik_enable_cgcg(rdev, true);
6025 } else {
6026 cik_enable_cgcg(rdev, false);
6027 cik_enable_mgcg(rdev, false);
6028 }
6029 }
6030
6031 if (block & RADEON_CG_BLOCK_MC) {
6032 if (!(rdev->flags & RADEON_IS_IGP)) {
6033 cik_enable_mc_mgcg(rdev, enable);
6034 cik_enable_mc_ls(rdev, enable);
6035 }
6036 }
6037
6038 if (block & RADEON_CG_BLOCK_SDMA) {
6039 cik_enable_sdma_mgcg(rdev, enable);
6040 cik_enable_sdma_mgls(rdev, enable);
6041 }
6042
6043 if (block & RADEON_CG_BLOCK_UVD) {
6044 if (rdev->has_uvd)
6045 cik_enable_uvd_mgcg(rdev, enable);
6046 }
6047
6048 if (block & RADEON_CG_BLOCK_HDP) {
6049 cik_enable_hdp_mgcg(rdev, enable);
6050 cik_enable_hdp_ls(rdev, enable);
6051 }
6052}
6053
6054static void cik_init_cg(struct radeon_device *rdev)
6055{
6056
6057 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6058
6059 if (rdev->has_uvd)
6060 si_init_uvd_internal_cg(rdev);
6061
6062 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6063 RADEON_CG_BLOCK_SDMA |
6064 RADEON_CG_BLOCK_UVD |
6065 RADEON_CG_BLOCK_HDP), true);
6066}
6067
6068static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6069 bool enable)
6070{
6071 u32 data, orig;
6072
6073 orig = data = RREG32(RLC_PG_CNTL);
6074 if (enable)
6075 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6076 else
6077 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6078 if (orig != data)
6079 WREG32(RLC_PG_CNTL, data);
6080}
6081
6082static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6083 bool enable)
6084{
6085 u32 data, orig;
6086
6087 orig = data = RREG32(RLC_PG_CNTL);
6088 if (enable)
6089 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6090 else
6091 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6092 if (orig != data)
6093 WREG32(RLC_PG_CNTL, data);
6094}
6095
6096static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6097{
6098 u32 data, orig;
6099
6100 orig = data = RREG32(RLC_PG_CNTL);
6101 if (enable)
6102 data &= ~DISABLE_CP_PG;
6103 else
6104 data |= DISABLE_CP_PG;
6105 if (orig != data)
6106 WREG32(RLC_PG_CNTL, data);
6107}
6108
6109static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6110{
6111 u32 data, orig;
6112
6113 orig = data = RREG32(RLC_PG_CNTL);
6114 if (enable)
6115 data &= ~DISABLE_GDS_PG;
6116 else
6117 data |= DISABLE_GDS_PG;
6118 if (orig != data)
6119 WREG32(RLC_PG_CNTL, data);
6120}
6121
6122#define CP_ME_TABLE_SIZE 96
6123#define CP_ME_TABLE_OFFSET 2048
6124#define CP_MEC_TABLE_OFFSET 4096
6125
6126void cik_init_cp_pg_table(struct radeon_device *rdev)
6127{
6128 const __be32 *fw_data;
6129 volatile u32 *dst_ptr;
6130 int me, i, max_me = 4;
6131 u32 bo_offset = 0;
6132 u32 table_offset;
6133
6134 if (rdev->family == CHIP_KAVERI)
6135 max_me = 5;
6136
6137 if (rdev->rlc.cp_table_ptr == NULL)
6138 return;
6139
6140 /* write the cp table buffer */
6141 dst_ptr = rdev->rlc.cp_table_ptr;
6142 for (me = 0; me < max_me; me++) {
6143 if (me == 0) {
6144 fw_data = (const __be32 *)rdev->ce_fw->data;
6145 table_offset = CP_ME_TABLE_OFFSET;
6146 } else if (me == 1) {
6147 fw_data = (const __be32 *)rdev->pfp_fw->data;
6148 table_offset = CP_ME_TABLE_OFFSET;
6149 } else if (me == 2) {
6150 fw_data = (const __be32 *)rdev->me_fw->data;
6151 table_offset = CP_ME_TABLE_OFFSET;
6152 } else {
6153 fw_data = (const __be32 *)rdev->mec_fw->data;
6154 table_offset = CP_MEC_TABLE_OFFSET;
6155 }
6156
6157 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6158 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6159 }
6160 bo_offset += CP_ME_TABLE_SIZE;
6161 }
6162}
6163
6164static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6165 bool enable)
6166{
6167 u32 data, orig;
6168
6169 if (enable) {
6170 orig = data = RREG32(RLC_PG_CNTL);
6171 data |= GFX_PG_ENABLE;
6172 if (orig != data)
6173 WREG32(RLC_PG_CNTL, data);
6174
6175 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6176 data |= AUTO_PG_EN;
6177 if (orig != data)
6178 WREG32(RLC_AUTO_PG_CTRL, data);
6179 } else {
6180 orig = data = RREG32(RLC_PG_CNTL);
6181 data &= ~GFX_PG_ENABLE;
6182 if (orig != data)
6183 WREG32(RLC_PG_CNTL, data);
6184
6185 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6186 data &= ~AUTO_PG_EN;
6187 if (orig != data)
6188 WREG32(RLC_AUTO_PG_CTRL, data);
6189
6190 data = RREG32(DB_RENDER_CONTROL);
6191 }
6192}
6193
6194static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6195{
6196 u32 mask = 0, tmp, tmp1;
6197 int i;
6198
6199 cik_select_se_sh(rdev, se, sh);
6200 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6201 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6202 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6203
6204 tmp &= 0xffff0000;
6205
6206 tmp |= tmp1;
6207 tmp >>= 16;
6208
6209 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6210 mask <<= 1;
6211 mask |= 1;
6212 }
6213
6214 return (~tmp) & mask;
6215}
6216
6217static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6218{
6219 u32 i, j, k, active_cu_number = 0;
6220 u32 mask, counter, cu_bitmap;
6221 u32 tmp = 0;
6222
6223 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6224 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6225 mask = 1;
6226 cu_bitmap = 0;
6227 counter = 0;
6228 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6229 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6230 if (counter < 2)
6231 cu_bitmap |= mask;
6232 counter ++;
6233 }
6234 mask <<= 1;
6235 }
6236
6237 active_cu_number += counter;
6238 tmp |= (cu_bitmap << (i * 16 + j * 8));
6239 }
6240 }
6241
6242 WREG32(RLC_PG_AO_CU_MASK, tmp);
6243
6244 tmp = RREG32(RLC_MAX_PG_CU);
6245 tmp &= ~MAX_PU_CU_MASK;
6246 tmp |= MAX_PU_CU(active_cu_number);
6247 WREG32(RLC_MAX_PG_CU, tmp);
6248}
6249
6250static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6251 bool enable)
6252{
6253 u32 data, orig;
6254
6255 orig = data = RREG32(RLC_PG_CNTL);
6256 if (enable)
6257 data |= STATIC_PER_CU_PG_ENABLE;
6258 else
6259 data &= ~STATIC_PER_CU_PG_ENABLE;
6260 if (orig != data)
6261 WREG32(RLC_PG_CNTL, data);
6262}
6263
6264static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6265 bool enable)
6266{
6267 u32 data, orig;
6268
6269 orig = data = RREG32(RLC_PG_CNTL);
6270 if (enable)
6271 data |= DYN_PER_CU_PG_ENABLE;
6272 else
6273 data &= ~DYN_PER_CU_PG_ENABLE;
6274 if (orig != data)
6275 WREG32(RLC_PG_CNTL, data);
6276}
6277
6278#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6279#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6280
6281static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6282{
6283 u32 data, orig;
6284 u32 i;
6285
6286 if (rdev->rlc.cs_data) {
6287 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6288 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6289 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6290 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6291 } else {
6292 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6293 for (i = 0; i < 3; i++)
6294 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6295 }
6296 if (rdev->rlc.reg_list) {
6297 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6298 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6299 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6300 }
6301
6302 orig = data = RREG32(RLC_PG_CNTL);
6303 data |= GFX_PG_SRC;
6304 if (orig != data)
6305 WREG32(RLC_PG_CNTL, data);
6306
6307 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6308 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6309
6310 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6311 data &= ~IDLE_POLL_COUNT_MASK;
6312 data |= IDLE_POLL_COUNT(0x60);
6313 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6314
6315 data = 0x10101010;
6316 WREG32(RLC_PG_DELAY, data);
6317
6318 data = RREG32(RLC_PG_DELAY_2);
6319 data &= ~0xff;
6320 data |= 0x3;
6321 WREG32(RLC_PG_DELAY_2, data);
6322
6323 data = RREG32(RLC_AUTO_PG_CTRL);
6324 data &= ~GRBM_REG_SGIT_MASK;
6325 data |= GRBM_REG_SGIT(0x700);
6326 WREG32(RLC_AUTO_PG_CTRL, data);
6327
6328}
6329
6330static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6331{
6332 bool has_pg = false;
6333 bool has_dyn_mgpg = false;
6334 bool has_static_mgpg = false;
6335
6336 /* only APUs have PG */
6337 if (rdev->flags & RADEON_IS_IGP) {
6338 has_pg = true;
6339 has_static_mgpg = true;
6340 if (rdev->family == CHIP_KAVERI)
6341 has_dyn_mgpg = true;
6342 }
6343
6344 if (has_pg) {
6345 cik_enable_gfx_cgpg(rdev, enable);
6346 if (enable) {
6347 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6348 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6349 } else {
6350 cik_enable_gfx_static_mgpg(rdev, false);
6351 cik_enable_gfx_dynamic_mgpg(rdev, false);
6352 }
6353 }
6354
6355}
6356
6357void cik_init_pg(struct radeon_device *rdev)
6358{
6359 bool has_pg = false;
6360
6361 /* only APUs have PG */
6362 if (rdev->flags & RADEON_IS_IGP) {
6363 /* XXX disable this for now */
6364 /* has_pg = true; */
6365 }
6366
6367 if (has_pg) {
6368 cik_enable_sck_slowdown_on_pu(rdev, true);
6369 cik_enable_sck_slowdown_on_pd(rdev, true);
6370 cik_init_gfx_cgpg(rdev);
6371 cik_enable_cp_pg(rdev, true);
6372 cik_enable_gds_pg(rdev, true);
6373 cik_init_ao_cu_mask(rdev);
6374 cik_update_gfx_pg(rdev, true);
6375 }
6376}
6377
Alex Deuchera59781b2012-11-09 10:45:57 -05006378/*
6379 * Interrupts
6380 * Starting with r6xx, interrupts are handled via a ring buffer.
6381 * Ring buffers are areas of GPU accessible memory that the GPU
6382 * writes interrupt vectors into and the host reads vectors out of.
6383 * There is a rptr (read pointer) that determines where the
6384 * host is currently reading, and a wptr (write pointer)
6385 * which determines where the GPU has written. When the
6386 * pointers are equal, the ring is idle. When the GPU
6387 * writes vectors to the ring buffer, it increments the
6388 * wptr. When there is an interrupt, the host then starts
6389 * fetching commands and processing them until the pointers are
6390 * equal again at which point it updates the rptr.
6391 */
6392
6393/**
6394 * cik_enable_interrupts - Enable the interrupt ring buffer
6395 *
6396 * @rdev: radeon_device pointer
6397 *
6398 * Enable the interrupt ring buffer (CIK).
6399 */
6400static void cik_enable_interrupts(struct radeon_device *rdev)
6401{
6402 u32 ih_cntl = RREG32(IH_CNTL);
6403 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6404
6405 ih_cntl |= ENABLE_INTR;
6406 ih_rb_cntl |= IH_RB_ENABLE;
6407 WREG32(IH_CNTL, ih_cntl);
6408 WREG32(IH_RB_CNTL, ih_rb_cntl);
6409 rdev->ih.enabled = true;
6410}
6411
6412/**
6413 * cik_disable_interrupts - Disable the interrupt ring buffer
6414 *
6415 * @rdev: radeon_device pointer
6416 *
6417 * Disable the interrupt ring buffer (CIK).
6418 */
6419static void cik_disable_interrupts(struct radeon_device *rdev)
6420{
6421 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6422 u32 ih_cntl = RREG32(IH_CNTL);
6423
6424 ih_rb_cntl &= ~IH_RB_ENABLE;
6425 ih_cntl &= ~ENABLE_INTR;
6426 WREG32(IH_RB_CNTL, ih_rb_cntl);
6427 WREG32(IH_CNTL, ih_cntl);
6428 /* set rptr, wptr to 0 */
6429 WREG32(IH_RB_RPTR, 0);
6430 WREG32(IH_RB_WPTR, 0);
6431 rdev->ih.enabled = false;
6432 rdev->ih.rptr = 0;
6433}
6434
6435/**
6436 * cik_disable_interrupt_state - Disable all interrupt sources
6437 *
6438 * @rdev: radeon_device pointer
6439 *
6440 * Clear all interrupt enable bits used by the driver (CIK).
6441 */
6442static void cik_disable_interrupt_state(struct radeon_device *rdev)
6443{
6444 u32 tmp;
6445
6446 /* gfx ring */
6447 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04006448 /* sdma */
6449 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6450 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6451 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6452 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05006453 /* compute queues */
6454 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6455 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6456 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6457 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6458 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6459 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6460 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6461 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6462 /* grbm */
6463 WREG32(GRBM_INT_CNTL, 0);
6464 /* vline/vblank, etc. */
6465 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6466 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6467 if (rdev->num_crtc >= 4) {
6468 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6469 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6470 }
6471 if (rdev->num_crtc >= 6) {
6472 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6473 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6474 }
6475
6476 /* dac hotplug */
6477 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6478
6479 /* digital hotplug */
6480 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6481 WREG32(DC_HPD1_INT_CONTROL, tmp);
6482 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6483 WREG32(DC_HPD2_INT_CONTROL, tmp);
6484 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6485 WREG32(DC_HPD3_INT_CONTROL, tmp);
6486 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6487 WREG32(DC_HPD4_INT_CONTROL, tmp);
6488 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6489 WREG32(DC_HPD5_INT_CONTROL, tmp);
6490 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6491 WREG32(DC_HPD6_INT_CONTROL, tmp);
6492
6493}
6494
6495/**
6496 * cik_irq_init - init and enable the interrupt ring
6497 *
6498 * @rdev: radeon_device pointer
6499 *
6500 * Allocate a ring buffer for the interrupt controller,
6501 * enable the RLC, disable interrupts, enable the IH
6502 * ring buffer and enable it (CIK).
6503 * Called at device load and reume.
6504 * Returns 0 for success, errors for failure.
6505 */
6506static int cik_irq_init(struct radeon_device *rdev)
6507{
6508 int ret = 0;
6509 int rb_bufsz;
6510 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6511
6512 /* allocate ring */
6513 ret = r600_ih_ring_alloc(rdev);
6514 if (ret)
6515 return ret;
6516
6517 /* disable irqs */
6518 cik_disable_interrupts(rdev);
6519
6520 /* init rlc */
6521 ret = cik_rlc_resume(rdev);
6522 if (ret) {
6523 r600_ih_ring_fini(rdev);
6524 return ret;
6525 }
6526
6527 /* setup interrupt control */
6528 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6529 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6530 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6531 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6532 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6533 */
6534 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6535 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6536 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6537 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6538
6539 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6540 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6541
6542 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6543 IH_WPTR_OVERFLOW_CLEAR |
6544 (rb_bufsz << 1));
6545
6546 if (rdev->wb.enabled)
6547 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6548
6549 /* set the writeback address whether it's enabled or not */
6550 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6551 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6552
6553 WREG32(IH_RB_CNTL, ih_rb_cntl);
6554
6555 /* set rptr, wptr to 0 */
6556 WREG32(IH_RB_RPTR, 0);
6557 WREG32(IH_RB_WPTR, 0);
6558
6559 /* Default settings for IH_CNTL (disabled at first) */
6560 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6561 /* RPTR_REARM only works if msi's are enabled */
6562 if (rdev->msi_enabled)
6563 ih_cntl |= RPTR_REARM;
6564 WREG32(IH_CNTL, ih_cntl);
6565
6566 /* force the active interrupt state to all disabled */
6567 cik_disable_interrupt_state(rdev);
6568
6569 pci_set_master(rdev->pdev);
6570
6571 /* enable irqs */
6572 cik_enable_interrupts(rdev);
6573
6574 return ret;
6575}
6576
6577/**
6578 * cik_irq_set - enable/disable interrupt sources
6579 *
6580 * @rdev: radeon_device pointer
6581 *
6582 * Enable interrupt sources on the GPU (vblanks, hpd,
6583 * etc.) (CIK).
6584 * Returns 0 for success, errors for failure.
6585 */
6586int cik_irq_set(struct radeon_device *rdev)
6587{
6588 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6589 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04006590 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6591 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05006592 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6593 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6594 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04006595 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05006596
6597 if (!rdev->irq.installed) {
6598 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6599 return -EINVAL;
6600 }
6601 /* don't enable anything if the ih is disabled */
6602 if (!rdev->ih.enabled) {
6603 cik_disable_interrupts(rdev);
6604 /* force the active interrupt state to all disabled */
6605 cik_disable_interrupt_state(rdev);
6606 return 0;
6607 }
6608
6609 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6610 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6611 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6612 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6613 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6614 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6615
Alex Deucher21a93e12013-04-09 12:47:11 -04006616 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6617 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6618
Alex Deucher2b0781a2013-04-09 14:26:16 -04006619 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6620 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6621 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6622 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6623 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6624 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6625 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6626 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6627
Alex Deuchera59781b2012-11-09 10:45:57 -05006628 /* enable CP interrupts on all rings */
6629 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6630 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6631 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6632 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04006633 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6634 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6635 DRM_DEBUG("si_irq_set: sw int cp1\n");
6636 if (ring->me == 1) {
6637 switch (ring->pipe) {
6638 case 0:
6639 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6640 break;
6641 case 1:
6642 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6643 break;
6644 case 2:
6645 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6646 break;
6647 case 3:
6648 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6649 break;
6650 default:
6651 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6652 break;
6653 }
6654 } else if (ring->me == 2) {
6655 switch (ring->pipe) {
6656 case 0:
6657 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6658 break;
6659 case 1:
6660 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6661 break;
6662 case 2:
6663 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6664 break;
6665 case 3:
6666 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6667 break;
6668 default:
6669 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6670 break;
6671 }
6672 } else {
6673 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6674 }
6675 }
6676 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6677 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6678 DRM_DEBUG("si_irq_set: sw int cp2\n");
6679 if (ring->me == 1) {
6680 switch (ring->pipe) {
6681 case 0:
6682 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6683 break;
6684 case 1:
6685 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6686 break;
6687 case 2:
6688 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6689 break;
6690 case 3:
6691 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6692 break;
6693 default:
6694 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6695 break;
6696 }
6697 } else if (ring->me == 2) {
6698 switch (ring->pipe) {
6699 case 0:
6700 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6701 break;
6702 case 1:
6703 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6704 break;
6705 case 2:
6706 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6707 break;
6708 case 3:
6709 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6710 break;
6711 default:
6712 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6713 break;
6714 }
6715 } else {
6716 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6717 }
6718 }
Alex Deuchera59781b2012-11-09 10:45:57 -05006719
Alex Deucher21a93e12013-04-09 12:47:11 -04006720 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6721 DRM_DEBUG("cik_irq_set: sw int dma\n");
6722 dma_cntl |= TRAP_ENABLE;
6723 }
6724
6725 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6726 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6727 dma_cntl1 |= TRAP_ENABLE;
6728 }
6729
Alex Deuchera59781b2012-11-09 10:45:57 -05006730 if (rdev->irq.crtc_vblank_int[0] ||
6731 atomic_read(&rdev->irq.pflip[0])) {
6732 DRM_DEBUG("cik_irq_set: vblank 0\n");
6733 crtc1 |= VBLANK_INTERRUPT_MASK;
6734 }
6735 if (rdev->irq.crtc_vblank_int[1] ||
6736 atomic_read(&rdev->irq.pflip[1])) {
6737 DRM_DEBUG("cik_irq_set: vblank 1\n");
6738 crtc2 |= VBLANK_INTERRUPT_MASK;
6739 }
6740 if (rdev->irq.crtc_vblank_int[2] ||
6741 atomic_read(&rdev->irq.pflip[2])) {
6742 DRM_DEBUG("cik_irq_set: vblank 2\n");
6743 crtc3 |= VBLANK_INTERRUPT_MASK;
6744 }
6745 if (rdev->irq.crtc_vblank_int[3] ||
6746 atomic_read(&rdev->irq.pflip[3])) {
6747 DRM_DEBUG("cik_irq_set: vblank 3\n");
6748 crtc4 |= VBLANK_INTERRUPT_MASK;
6749 }
6750 if (rdev->irq.crtc_vblank_int[4] ||
6751 atomic_read(&rdev->irq.pflip[4])) {
6752 DRM_DEBUG("cik_irq_set: vblank 4\n");
6753 crtc5 |= VBLANK_INTERRUPT_MASK;
6754 }
6755 if (rdev->irq.crtc_vblank_int[5] ||
6756 atomic_read(&rdev->irq.pflip[5])) {
6757 DRM_DEBUG("cik_irq_set: vblank 5\n");
6758 crtc6 |= VBLANK_INTERRUPT_MASK;
6759 }
6760 if (rdev->irq.hpd[0]) {
6761 DRM_DEBUG("cik_irq_set: hpd 1\n");
6762 hpd1 |= DC_HPDx_INT_EN;
6763 }
6764 if (rdev->irq.hpd[1]) {
6765 DRM_DEBUG("cik_irq_set: hpd 2\n");
6766 hpd2 |= DC_HPDx_INT_EN;
6767 }
6768 if (rdev->irq.hpd[2]) {
6769 DRM_DEBUG("cik_irq_set: hpd 3\n");
6770 hpd3 |= DC_HPDx_INT_EN;
6771 }
6772 if (rdev->irq.hpd[3]) {
6773 DRM_DEBUG("cik_irq_set: hpd 4\n");
6774 hpd4 |= DC_HPDx_INT_EN;
6775 }
6776 if (rdev->irq.hpd[4]) {
6777 DRM_DEBUG("cik_irq_set: hpd 5\n");
6778 hpd5 |= DC_HPDx_INT_EN;
6779 }
6780 if (rdev->irq.hpd[5]) {
6781 DRM_DEBUG("cik_irq_set: hpd 6\n");
6782 hpd6 |= DC_HPDx_INT_EN;
6783 }
6784
6785 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6786
Alex Deucher21a93e12013-04-09 12:47:11 -04006787 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6788 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6789
Alex Deucher2b0781a2013-04-09 14:26:16 -04006790 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6791 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6792 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6793 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6794 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6795 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6796 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6797 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6798
Alex Deuchera59781b2012-11-09 10:45:57 -05006799 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6800
6801 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6802 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6803 if (rdev->num_crtc >= 4) {
6804 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6805 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6806 }
6807 if (rdev->num_crtc >= 6) {
6808 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6809 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6810 }
6811
6812 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6813 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6814 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6815 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6816 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6817 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6818
6819 return 0;
6820}
6821
6822/**
6823 * cik_irq_ack - ack interrupt sources
6824 *
6825 * @rdev: radeon_device pointer
6826 *
6827 * Ack interrupt sources on the GPU (vblanks, hpd,
6828 * etc.) (CIK). Certain interrupts sources are sw
6829 * generated and do not require an explicit ack.
6830 */
6831static inline void cik_irq_ack(struct radeon_device *rdev)
6832{
6833 u32 tmp;
6834
6835 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6836 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6837 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6838 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6839 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6840 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6841 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6842
6843 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6844 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6845 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6846 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6847 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6848 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6849 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6850 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6851
6852 if (rdev->num_crtc >= 4) {
6853 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6854 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6855 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6856 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6857 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6858 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6859 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6860 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6861 }
6862
6863 if (rdev->num_crtc >= 6) {
6864 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6865 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6866 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6867 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6868 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6869 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6870 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6871 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6872 }
6873
6874 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6875 tmp = RREG32(DC_HPD1_INT_CONTROL);
6876 tmp |= DC_HPDx_INT_ACK;
6877 WREG32(DC_HPD1_INT_CONTROL, tmp);
6878 }
6879 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6880 tmp = RREG32(DC_HPD2_INT_CONTROL);
6881 tmp |= DC_HPDx_INT_ACK;
6882 WREG32(DC_HPD2_INT_CONTROL, tmp);
6883 }
6884 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6885 tmp = RREG32(DC_HPD3_INT_CONTROL);
6886 tmp |= DC_HPDx_INT_ACK;
6887 WREG32(DC_HPD3_INT_CONTROL, tmp);
6888 }
6889 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6890 tmp = RREG32(DC_HPD4_INT_CONTROL);
6891 tmp |= DC_HPDx_INT_ACK;
6892 WREG32(DC_HPD4_INT_CONTROL, tmp);
6893 }
6894 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6895 tmp = RREG32(DC_HPD5_INT_CONTROL);
6896 tmp |= DC_HPDx_INT_ACK;
6897 WREG32(DC_HPD5_INT_CONTROL, tmp);
6898 }
6899 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6900 tmp = RREG32(DC_HPD5_INT_CONTROL);
6901 tmp |= DC_HPDx_INT_ACK;
6902 WREG32(DC_HPD6_INT_CONTROL, tmp);
6903 }
6904}
6905
6906/**
6907 * cik_irq_disable - disable interrupts
6908 *
6909 * @rdev: radeon_device pointer
6910 *
6911 * Disable interrupts on the hw (CIK).
6912 */
6913static void cik_irq_disable(struct radeon_device *rdev)
6914{
6915 cik_disable_interrupts(rdev);
6916 /* Wait and acknowledge irq */
6917 mdelay(1);
6918 cik_irq_ack(rdev);
6919 cik_disable_interrupt_state(rdev);
6920}
6921
6922/**
6923 * cik_irq_disable - disable interrupts for suspend
6924 *
6925 * @rdev: radeon_device pointer
6926 *
6927 * Disable interrupts and stop the RLC (CIK).
6928 * Used for suspend.
6929 */
6930static void cik_irq_suspend(struct radeon_device *rdev)
6931{
6932 cik_irq_disable(rdev);
6933 cik_rlc_stop(rdev);
6934}
6935
6936/**
6937 * cik_irq_fini - tear down interrupt support
6938 *
6939 * @rdev: radeon_device pointer
6940 *
6941 * Disable interrupts on the hw and free the IH ring
6942 * buffer (CIK).
6943 * Used for driver unload.
6944 */
6945static void cik_irq_fini(struct radeon_device *rdev)
6946{
6947 cik_irq_suspend(rdev);
6948 r600_ih_ring_fini(rdev);
6949}
6950
6951/**
6952 * cik_get_ih_wptr - get the IH ring buffer wptr
6953 *
6954 * @rdev: radeon_device pointer
6955 *
6956 * Get the IH ring buffer wptr from either the register
6957 * or the writeback memory buffer (CIK). Also check for
6958 * ring buffer overflow and deal with it.
6959 * Used by cik_irq_process().
6960 * Returns the value of the wptr.
6961 */
6962static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6963{
6964 u32 wptr, tmp;
6965
6966 if (rdev->wb.enabled)
6967 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6968 else
6969 wptr = RREG32(IH_RB_WPTR);
6970
6971 if (wptr & RB_OVERFLOW) {
6972 /* When a ring buffer overflow happen start parsing interrupt
6973 * from the last not overwritten vector (wptr + 16). Hopefully
6974 * this should allow us to catchup.
6975 */
6976 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6977 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6978 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6979 tmp = RREG32(IH_RB_CNTL);
6980 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6981 WREG32(IH_RB_CNTL, tmp);
6982 }
6983 return (wptr & rdev->ih.ptr_mask);
6984}
6985
6986/* CIK IV Ring
6987 * Each IV ring entry is 128 bits:
6988 * [7:0] - interrupt source id
6989 * [31:8] - reserved
6990 * [59:32] - interrupt source data
6991 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04006992 * [71:64] - RINGID
6993 * CP:
6994 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05006995 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6996 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6997 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6998 * PIPE_ID - ME0 0=3D
6999 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04007000 * SDMA:
7001 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7002 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7003 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05007004 * [79:72] - VMID
7005 * [95:80] - PASID
7006 * [127:96] - reserved
7007 */
7008/**
7009 * cik_irq_process - interrupt handler
7010 *
7011 * @rdev: radeon_device pointer
7012 *
7013 * Interrupt hander (CIK). Walk the IH ring,
7014 * ack interrupts and schedule work to handle
7015 * interrupt events.
7016 * Returns irq process return code.
7017 */
7018int cik_irq_process(struct radeon_device *rdev)
7019{
Alex Deucher2b0781a2013-04-09 14:26:16 -04007020 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7021 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05007022 u32 wptr;
7023 u32 rptr;
7024 u32 src_id, src_data, ring_id;
7025 u8 me_id, pipe_id, queue_id;
7026 u32 ring_index;
7027 bool queue_hotplug = false;
7028 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04007029 u32 addr, status, mc_client;
Alex Deuchera59781b2012-11-09 10:45:57 -05007030
7031 if (!rdev->ih.enabled || rdev->shutdown)
7032 return IRQ_NONE;
7033
7034 wptr = cik_get_ih_wptr(rdev);
7035
7036restart_ih:
7037 /* is somebody else already processing irqs? */
7038 if (atomic_xchg(&rdev->ih.lock, 1))
7039 return IRQ_NONE;
7040
7041 rptr = rdev->ih.rptr;
7042 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7043
7044 /* Order reading of wptr vs. reading of IH ring data */
7045 rmb();
7046
7047 /* display interrupts */
7048 cik_irq_ack(rdev);
7049
7050 while (rptr != wptr) {
7051 /* wptr/rptr are in bytes! */
7052 ring_index = rptr / 4;
7053 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7054 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7055 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05007056
7057 switch (src_id) {
7058 case 1: /* D1 vblank/vline */
7059 switch (src_data) {
7060 case 0: /* D1 vblank */
7061 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7062 if (rdev->irq.crtc_vblank_int[0]) {
7063 drm_handle_vblank(rdev->ddev, 0);
7064 rdev->pm.vblank_sync = true;
7065 wake_up(&rdev->irq.vblank_queue);
7066 }
7067 if (atomic_read(&rdev->irq.pflip[0]))
7068 radeon_crtc_handle_flip(rdev, 0);
7069 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7070 DRM_DEBUG("IH: D1 vblank\n");
7071 }
7072 break;
7073 case 1: /* D1 vline */
7074 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7075 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7076 DRM_DEBUG("IH: D1 vline\n");
7077 }
7078 break;
7079 default:
7080 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7081 break;
7082 }
7083 break;
7084 case 2: /* D2 vblank/vline */
7085 switch (src_data) {
7086 case 0: /* D2 vblank */
7087 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7088 if (rdev->irq.crtc_vblank_int[1]) {
7089 drm_handle_vblank(rdev->ddev, 1);
7090 rdev->pm.vblank_sync = true;
7091 wake_up(&rdev->irq.vblank_queue);
7092 }
7093 if (atomic_read(&rdev->irq.pflip[1]))
7094 radeon_crtc_handle_flip(rdev, 1);
7095 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7096 DRM_DEBUG("IH: D2 vblank\n");
7097 }
7098 break;
7099 case 1: /* D2 vline */
7100 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7101 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7102 DRM_DEBUG("IH: D2 vline\n");
7103 }
7104 break;
7105 default:
7106 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7107 break;
7108 }
7109 break;
7110 case 3: /* D3 vblank/vline */
7111 switch (src_data) {
7112 case 0: /* D3 vblank */
7113 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7114 if (rdev->irq.crtc_vblank_int[2]) {
7115 drm_handle_vblank(rdev->ddev, 2);
7116 rdev->pm.vblank_sync = true;
7117 wake_up(&rdev->irq.vblank_queue);
7118 }
7119 if (atomic_read(&rdev->irq.pflip[2]))
7120 radeon_crtc_handle_flip(rdev, 2);
7121 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7122 DRM_DEBUG("IH: D3 vblank\n");
7123 }
7124 break;
7125 case 1: /* D3 vline */
7126 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7127 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7128 DRM_DEBUG("IH: D3 vline\n");
7129 }
7130 break;
7131 default:
7132 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7133 break;
7134 }
7135 break;
7136 case 4: /* D4 vblank/vline */
7137 switch (src_data) {
7138 case 0: /* D4 vblank */
7139 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7140 if (rdev->irq.crtc_vblank_int[3]) {
7141 drm_handle_vblank(rdev->ddev, 3);
7142 rdev->pm.vblank_sync = true;
7143 wake_up(&rdev->irq.vblank_queue);
7144 }
7145 if (atomic_read(&rdev->irq.pflip[3]))
7146 radeon_crtc_handle_flip(rdev, 3);
7147 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7148 DRM_DEBUG("IH: D4 vblank\n");
7149 }
7150 break;
7151 case 1: /* D4 vline */
7152 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7153 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7154 DRM_DEBUG("IH: D4 vline\n");
7155 }
7156 break;
7157 default:
7158 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7159 break;
7160 }
7161 break;
7162 case 5: /* D5 vblank/vline */
7163 switch (src_data) {
7164 case 0: /* D5 vblank */
7165 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7166 if (rdev->irq.crtc_vblank_int[4]) {
7167 drm_handle_vblank(rdev->ddev, 4);
7168 rdev->pm.vblank_sync = true;
7169 wake_up(&rdev->irq.vblank_queue);
7170 }
7171 if (atomic_read(&rdev->irq.pflip[4]))
7172 radeon_crtc_handle_flip(rdev, 4);
7173 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7174 DRM_DEBUG("IH: D5 vblank\n");
7175 }
7176 break;
7177 case 1: /* D5 vline */
7178 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7179 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7180 DRM_DEBUG("IH: D5 vline\n");
7181 }
7182 break;
7183 default:
7184 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7185 break;
7186 }
7187 break;
7188 case 6: /* D6 vblank/vline */
7189 switch (src_data) {
7190 case 0: /* D6 vblank */
7191 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7192 if (rdev->irq.crtc_vblank_int[5]) {
7193 drm_handle_vblank(rdev->ddev, 5);
7194 rdev->pm.vblank_sync = true;
7195 wake_up(&rdev->irq.vblank_queue);
7196 }
7197 if (atomic_read(&rdev->irq.pflip[5]))
7198 radeon_crtc_handle_flip(rdev, 5);
7199 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7200 DRM_DEBUG("IH: D6 vblank\n");
7201 }
7202 break;
7203 case 1: /* D6 vline */
7204 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7205 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7206 DRM_DEBUG("IH: D6 vline\n");
7207 }
7208 break;
7209 default:
7210 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7211 break;
7212 }
7213 break;
7214 case 42: /* HPD hotplug */
7215 switch (src_data) {
7216 case 0:
7217 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7218 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7219 queue_hotplug = true;
7220 DRM_DEBUG("IH: HPD1\n");
7221 }
7222 break;
7223 case 1:
7224 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7225 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7226 queue_hotplug = true;
7227 DRM_DEBUG("IH: HPD2\n");
7228 }
7229 break;
7230 case 2:
7231 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7232 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7233 queue_hotplug = true;
7234 DRM_DEBUG("IH: HPD3\n");
7235 }
7236 break;
7237 case 3:
7238 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7239 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7240 queue_hotplug = true;
7241 DRM_DEBUG("IH: HPD4\n");
7242 }
7243 break;
7244 case 4:
7245 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7246 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7247 queue_hotplug = true;
7248 DRM_DEBUG("IH: HPD5\n");
7249 }
7250 break;
7251 case 5:
7252 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7253 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7254 queue_hotplug = true;
7255 DRM_DEBUG("IH: HPD6\n");
7256 }
7257 break;
7258 default:
7259 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7260 break;
7261 }
7262 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04007263 case 146:
7264 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04007265 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7266 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7267 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04007268 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7269 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007270 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04007271 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007272 status);
7273 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04007274 /* reset addr and status */
7275 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7276 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007277 case 176: /* GFX RB CP_INT */
7278 case 177: /* GFX IB CP_INT */
7279 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7280 break;
7281 case 181: /* CP EOP event */
7282 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007283 /* XXX check the bitfield order! */
7284 me_id = (ring_id & 0x60) >> 5;
7285 pipe_id = (ring_id & 0x18) >> 3;
7286 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007287 switch (me_id) {
7288 case 0:
7289 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7290 break;
7291 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05007292 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04007293 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7294 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7295 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7296 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05007297 break;
7298 }
7299 break;
7300 case 184: /* CP Privileged reg access */
7301 DRM_ERROR("Illegal register access in command stream\n");
7302 /* XXX check the bitfield order! */
7303 me_id = (ring_id & 0x60) >> 5;
7304 pipe_id = (ring_id & 0x18) >> 3;
7305 queue_id = (ring_id & 0x7) >> 0;
7306 switch (me_id) {
7307 case 0:
7308 /* This results in a full GPU reset, but all we need to do is soft
7309 * reset the CP for gfx
7310 */
7311 queue_reset = true;
7312 break;
7313 case 1:
7314 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007315 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007316 break;
7317 case 2:
7318 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007319 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007320 break;
7321 }
7322 break;
7323 case 185: /* CP Privileged inst */
7324 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007325 /* XXX check the bitfield order! */
7326 me_id = (ring_id & 0x60) >> 5;
7327 pipe_id = (ring_id & 0x18) >> 3;
7328 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007329 switch (me_id) {
7330 case 0:
7331 /* This results in a full GPU reset, but all we need to do is soft
7332 * reset the CP for gfx
7333 */
7334 queue_reset = true;
7335 break;
7336 case 1:
7337 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007338 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007339 break;
7340 case 2:
7341 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007342 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007343 break;
7344 }
7345 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04007346 case 224: /* SDMA trap event */
7347 /* XXX check the bitfield order! */
7348 me_id = (ring_id & 0x3) >> 0;
7349 queue_id = (ring_id & 0xc) >> 2;
7350 DRM_DEBUG("IH: SDMA trap\n");
7351 switch (me_id) {
7352 case 0:
7353 switch (queue_id) {
7354 case 0:
7355 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7356 break;
7357 case 1:
7358 /* XXX compute */
7359 break;
7360 case 2:
7361 /* XXX compute */
7362 break;
7363 }
7364 break;
7365 case 1:
7366 switch (queue_id) {
7367 case 0:
7368 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7369 break;
7370 case 1:
7371 /* XXX compute */
7372 break;
7373 case 2:
7374 /* XXX compute */
7375 break;
7376 }
7377 break;
7378 }
7379 break;
7380 case 241: /* SDMA Privileged inst */
7381 case 247: /* SDMA Privileged inst */
7382 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7383 /* XXX check the bitfield order! */
7384 me_id = (ring_id & 0x3) >> 0;
7385 queue_id = (ring_id & 0xc) >> 2;
7386 switch (me_id) {
7387 case 0:
7388 switch (queue_id) {
7389 case 0:
7390 queue_reset = true;
7391 break;
7392 case 1:
7393 /* XXX compute */
7394 queue_reset = true;
7395 break;
7396 case 2:
7397 /* XXX compute */
7398 queue_reset = true;
7399 break;
7400 }
7401 break;
7402 case 1:
7403 switch (queue_id) {
7404 case 0:
7405 queue_reset = true;
7406 break;
7407 case 1:
7408 /* XXX compute */
7409 queue_reset = true;
7410 break;
7411 case 2:
7412 /* XXX compute */
7413 queue_reset = true;
7414 break;
7415 }
7416 break;
7417 }
7418 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007419 case 233: /* GUI IDLE */
7420 DRM_DEBUG("IH: GUI idle\n");
7421 break;
7422 default:
7423 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7424 break;
7425 }
7426
7427 /* wptr/rptr are in bytes! */
7428 rptr += 16;
7429 rptr &= rdev->ih.ptr_mask;
7430 }
7431 if (queue_hotplug)
7432 schedule_work(&rdev->hotplug_work);
7433 if (queue_reset)
7434 schedule_work(&rdev->reset_work);
7435 rdev->ih.rptr = rptr;
7436 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7437 atomic_set(&rdev->ih.lock, 0);
7438
7439 /* make sure wptr hasn't changed while processing */
7440 wptr = cik_get_ih_wptr(rdev);
7441 if (wptr != rptr)
7442 goto restart_ih;
7443
7444 return IRQ_HANDLED;
7445}
Alex Deucher7bf94a22012-08-17 11:48:29 -04007446
7447/*
7448 * startup/shutdown callbacks
7449 */
7450/**
7451 * cik_startup - program the asic to a functional state
7452 *
7453 * @rdev: radeon_device pointer
7454 *
7455 * Programs the asic to a functional state (CIK).
7456 * Called by cik_init() and cik_resume().
7457 * Returns 0 for success, error for failure.
7458 */
7459static int cik_startup(struct radeon_device *rdev)
7460{
7461 struct radeon_ring *ring;
7462 int r;
7463
Alex Deucher8a7cd272013-08-06 11:29:39 -04007464 /* enable pcie gen2/3 link */
7465 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04007466 /* enable aspm */
7467 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04007468
Alex Deucher6fab3feb2013-08-04 12:13:17 -04007469 cik_mc_program(rdev);
7470
Alex Deucher7bf94a22012-08-17 11:48:29 -04007471 if (rdev->flags & RADEON_IS_IGP) {
7472 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7473 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7474 r = cik_init_microcode(rdev);
7475 if (r) {
7476 DRM_ERROR("Failed to load firmware!\n");
7477 return r;
7478 }
7479 }
7480 } else {
7481 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7482 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7483 !rdev->mc_fw) {
7484 r = cik_init_microcode(rdev);
7485 if (r) {
7486 DRM_ERROR("Failed to load firmware!\n");
7487 return r;
7488 }
7489 }
7490
7491 r = ci_mc_load_microcode(rdev);
7492 if (r) {
7493 DRM_ERROR("Failed to load MC firmware!\n");
7494 return r;
7495 }
7496 }
7497
7498 r = r600_vram_scratch_init(rdev);
7499 if (r)
7500 return r;
7501
Alex Deucher7bf94a22012-08-17 11:48:29 -04007502 r = cik_pcie_gart_enable(rdev);
7503 if (r)
7504 return r;
7505 cik_gpu_init(rdev);
7506
7507 /* allocate rlc buffers */
Alex Deucher22c775c2013-07-23 09:41:05 -04007508 if (rdev->flags & RADEON_IS_IGP) {
7509 if (rdev->family == CHIP_KAVERI) {
7510 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7511 rdev->rlc.reg_list_size =
7512 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7513 } else {
7514 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7515 rdev->rlc.reg_list_size =
7516 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7517 }
7518 }
7519 rdev->rlc.cs_data = ci_cs_data;
7520 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
Alex Deucher1fd11772013-04-17 17:53:50 -04007521 r = sumo_rlc_init(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007522 if (r) {
7523 DRM_ERROR("Failed to init rlc BOs!\n");
7524 return r;
7525 }
7526
7527 /* allocate wb buffer */
7528 r = radeon_wb_init(rdev);
7529 if (r)
7530 return r;
7531
Alex Deucher963e81f2013-06-26 17:37:11 -04007532 /* allocate mec buffers */
7533 r = cik_mec_init(rdev);
7534 if (r) {
7535 DRM_ERROR("Failed to init MEC BOs!\n");
7536 return r;
7537 }
7538
Alex Deucher7bf94a22012-08-17 11:48:29 -04007539 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7540 if (r) {
7541 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7542 return r;
7543 }
7544
Alex Deucher963e81f2013-06-26 17:37:11 -04007545 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7546 if (r) {
7547 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7548 return r;
7549 }
7550
7551 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7552 if (r) {
7553 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7554 return r;
7555 }
7556
Alex Deucher7bf94a22012-08-17 11:48:29 -04007557 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7558 if (r) {
7559 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7560 return r;
7561 }
7562
7563 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7564 if (r) {
7565 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7566 return r;
7567 }
7568
Christian König87167bb2013-04-09 13:39:21 -04007569 r = cik_uvd_resume(rdev);
7570 if (!r) {
7571 r = radeon_fence_driver_start_ring(rdev,
7572 R600_RING_TYPE_UVD_INDEX);
7573 if (r)
7574 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7575 }
7576 if (r)
7577 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7578
Alex Deucher7bf94a22012-08-17 11:48:29 -04007579 /* Enable IRQ */
7580 if (!rdev->irq.installed) {
7581 r = radeon_irq_kms_init(rdev);
7582 if (r)
7583 return r;
7584 }
7585
7586 r = cik_irq_init(rdev);
7587 if (r) {
7588 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7589 radeon_irq_kms_fini(rdev);
7590 return r;
7591 }
7592 cik_irq_set(rdev);
7593
7594 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7595 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7596 CP_RB0_RPTR, CP_RB0_WPTR,
7597 0, 0xfffff, RADEON_CP_PACKET2);
7598 if (r)
7599 return r;
7600
Alex Deucher963e81f2013-06-26 17:37:11 -04007601 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04007602 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007603 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7604 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7605 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04007606 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007607 if (r)
7608 return r;
7609 ring->me = 1; /* first MEC */
7610 ring->pipe = 0; /* first pipe */
7611 ring->queue = 0; /* first queue */
7612 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7613
Alex Deucher2615b532013-06-03 11:21:58 -04007614 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007615 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7616 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7617 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04007618 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007619 if (r)
7620 return r;
7621 /* dGPU only have 1 MEC */
7622 ring->me = 1; /* first MEC */
7623 ring->pipe = 0; /* first pipe */
7624 ring->queue = 1; /* second queue */
7625 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7626
Alex Deucher7bf94a22012-08-17 11:48:29 -04007627 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7628 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7629 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7630 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7631 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7632 if (r)
7633 return r;
7634
7635 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7636 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7637 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7638 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7639 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7640 if (r)
7641 return r;
7642
7643 r = cik_cp_resume(rdev);
7644 if (r)
7645 return r;
7646
7647 r = cik_sdma_resume(rdev);
7648 if (r)
7649 return r;
7650
Christian König87167bb2013-04-09 13:39:21 -04007651 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7652 if (ring->ring_size) {
7653 r = radeon_ring_init(rdev, ring, ring->ring_size,
7654 R600_WB_UVD_RPTR_OFFSET,
7655 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7656 0, 0xfffff, RADEON_CP_PACKET2);
7657 if (!r)
7658 r = r600_uvd_init(rdev);
7659 if (r)
7660 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7661 }
7662
Alex Deucher7bf94a22012-08-17 11:48:29 -04007663 r = radeon_ib_pool_init(rdev);
7664 if (r) {
7665 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7666 return r;
7667 }
7668
7669 r = radeon_vm_manager_init(rdev);
7670 if (r) {
7671 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7672 return r;
7673 }
7674
7675 return 0;
7676}
7677
7678/**
7679 * cik_resume - resume the asic to a functional state
7680 *
7681 * @rdev: radeon_device pointer
7682 *
7683 * Programs the asic to a functional state (CIK).
7684 * Called at resume.
7685 * Returns 0 for success, error for failure.
7686 */
7687int cik_resume(struct radeon_device *rdev)
7688{
7689 int r;
7690
7691 /* post card */
7692 atom_asic_init(rdev->mode_info.atom_context);
7693
Alex Deucher0aafd312013-04-09 14:43:30 -04007694 /* init golden registers */
7695 cik_init_golden_registers(rdev);
7696
Alex Deucher7bf94a22012-08-17 11:48:29 -04007697 rdev->accel_working = true;
7698 r = cik_startup(rdev);
7699 if (r) {
7700 DRM_ERROR("cik startup failed on resume\n");
7701 rdev->accel_working = false;
7702 return r;
7703 }
7704
7705 return r;
7706
7707}
7708
7709/**
7710 * cik_suspend - suspend the asic
7711 *
7712 * @rdev: radeon_device pointer
7713 *
7714 * Bring the chip into a state suitable for suspend (CIK).
7715 * Called at suspend.
7716 * Returns 0 for success.
7717 */
7718int cik_suspend(struct radeon_device *rdev)
7719{
7720 radeon_vm_manager_fini(rdev);
7721 cik_cp_enable(rdev, false);
7722 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02007723 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007724 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007725 cik_irq_suspend(rdev);
7726 radeon_wb_disable(rdev);
7727 cik_pcie_gart_disable(rdev);
7728 return 0;
7729}
7730
7731/* Plan is to move initialization in that function and use
7732 * helper function so that radeon_device_init pretty much
7733 * do nothing more than calling asic specific function. This
7734 * should also allow to remove a bunch of callback function
7735 * like vram_info.
7736 */
7737/**
7738 * cik_init - asic specific driver and hw init
7739 *
7740 * @rdev: radeon_device pointer
7741 *
7742 * Setup asic specific driver variables and program the hw
7743 * to a functional state (CIK).
7744 * Called at driver startup.
7745 * Returns 0 for success, errors for failure.
7746 */
7747int cik_init(struct radeon_device *rdev)
7748{
7749 struct radeon_ring *ring;
7750 int r;
7751
7752 /* Read BIOS */
7753 if (!radeon_get_bios(rdev)) {
7754 if (ASIC_IS_AVIVO(rdev))
7755 return -EINVAL;
7756 }
7757 /* Must be an ATOMBIOS */
7758 if (!rdev->is_atom_bios) {
7759 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7760 return -EINVAL;
7761 }
7762 r = radeon_atombios_init(rdev);
7763 if (r)
7764 return r;
7765
7766 /* Post card if necessary */
7767 if (!radeon_card_posted(rdev)) {
7768 if (!rdev->bios) {
7769 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7770 return -EINVAL;
7771 }
7772 DRM_INFO("GPU not posted. posting now...\n");
7773 atom_asic_init(rdev->mode_info.atom_context);
7774 }
Alex Deucher0aafd312013-04-09 14:43:30 -04007775 /* init golden registers */
7776 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007777 /* Initialize scratch registers */
7778 cik_scratch_init(rdev);
7779 /* Initialize surface registers */
7780 radeon_surface_init(rdev);
7781 /* Initialize clocks */
7782 radeon_get_clock_info(rdev->ddev);
7783
7784 /* Fence driver */
7785 r = radeon_fence_driver_init(rdev);
7786 if (r)
7787 return r;
7788
7789 /* initialize memory controller */
7790 r = cik_mc_init(rdev);
7791 if (r)
7792 return r;
7793 /* Memory manager */
7794 r = radeon_bo_init(rdev);
7795 if (r)
7796 return r;
7797
7798 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7799 ring->ring_obj = NULL;
7800 r600_ring_init(rdev, ring, 1024 * 1024);
7801
Alex Deucher963e81f2013-06-26 17:37:11 -04007802 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7803 ring->ring_obj = NULL;
7804 r600_ring_init(rdev, ring, 1024 * 1024);
7805 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7806 if (r)
7807 return r;
7808
7809 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7810 ring->ring_obj = NULL;
7811 r600_ring_init(rdev, ring, 1024 * 1024);
7812 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7813 if (r)
7814 return r;
7815
Alex Deucher7bf94a22012-08-17 11:48:29 -04007816 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7817 ring->ring_obj = NULL;
7818 r600_ring_init(rdev, ring, 256 * 1024);
7819
7820 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7821 ring->ring_obj = NULL;
7822 r600_ring_init(rdev, ring, 256 * 1024);
7823
Christian König87167bb2013-04-09 13:39:21 -04007824 r = radeon_uvd_init(rdev);
7825 if (!r) {
7826 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7827 ring->ring_obj = NULL;
7828 r600_ring_init(rdev, ring, 4096);
7829 }
7830
Alex Deucher7bf94a22012-08-17 11:48:29 -04007831 rdev->ih.ring_obj = NULL;
7832 r600_ih_ring_init(rdev, 64 * 1024);
7833
7834 r = r600_pcie_gart_init(rdev);
7835 if (r)
7836 return r;
7837
7838 rdev->accel_working = true;
7839 r = cik_startup(rdev);
7840 if (r) {
7841 dev_err(rdev->dev, "disabling GPU acceleration\n");
7842 cik_cp_fini(rdev);
7843 cik_sdma_fini(rdev);
7844 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007845 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007846 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007847 radeon_wb_fini(rdev);
7848 radeon_ib_pool_fini(rdev);
7849 radeon_vm_manager_fini(rdev);
7850 radeon_irq_kms_fini(rdev);
7851 cik_pcie_gart_fini(rdev);
7852 rdev->accel_working = false;
7853 }
7854
7855 /* Don't start up if the MC ucode is missing.
7856 * The default clocks and voltages before the MC ucode
7857 * is loaded are not suffient for advanced operations.
7858 */
7859 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7860 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7861 return -EINVAL;
7862 }
7863
7864 return 0;
7865}
7866
7867/**
7868 * cik_fini - asic specific driver and hw fini
7869 *
7870 * @rdev: radeon_device pointer
7871 *
7872 * Tear down the asic specific driver variables and program the hw
7873 * to an idle state (CIK).
7874 * Called at driver unload.
7875 */
7876void cik_fini(struct radeon_device *rdev)
7877{
7878 cik_cp_fini(rdev);
7879 cik_sdma_fini(rdev);
7880 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007881 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007882 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007883 radeon_wb_fini(rdev);
7884 radeon_vm_manager_fini(rdev);
7885 radeon_ib_pool_fini(rdev);
7886 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02007887 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007888 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007889 cik_pcie_gart_fini(rdev);
7890 r600_vram_scratch_fini(rdev);
7891 radeon_gem_fini(rdev);
7892 radeon_fence_driver_fini(rdev);
7893 radeon_bo_fini(rdev);
7894 radeon_atombios_fini(rdev);
7895 kfree(rdev->bios);
7896 rdev->bios = NULL;
7897}
Alex Deuchercd84a272012-07-20 17:13:13 -04007898
7899/* display watermark setup */
7900/**
7901 * dce8_line_buffer_adjust - Set up the line buffer
7902 *
7903 * @rdev: radeon_device pointer
7904 * @radeon_crtc: the selected display controller
7905 * @mode: the current display mode on the selected display
7906 * controller
7907 *
7908 * Setup up the line buffer allocation for
7909 * the selected display controller (CIK).
7910 * Returns the line buffer size in pixels.
7911 */
7912static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7913 struct radeon_crtc *radeon_crtc,
7914 struct drm_display_mode *mode)
7915{
7916 u32 tmp;
7917
7918 /*
7919 * Line Buffer Setup
7920 * There are 6 line buffers, one for each display controllers.
7921 * There are 3 partitions per LB. Select the number of partitions
7922 * to enable based on the display width. For display widths larger
7923 * than 4096, you need use to use 2 display controllers and combine
7924 * them using the stereo blender.
7925 */
7926 if (radeon_crtc->base.enabled && mode) {
7927 if (mode->crtc_hdisplay < 1920)
7928 tmp = 1;
7929 else if (mode->crtc_hdisplay < 2560)
7930 tmp = 2;
7931 else if (mode->crtc_hdisplay < 4096)
7932 tmp = 0;
7933 else {
7934 DRM_DEBUG_KMS("Mode too big for LB!\n");
7935 tmp = 0;
7936 }
7937 } else
7938 tmp = 1;
7939
7940 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7941 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7942
7943 if (radeon_crtc->base.enabled && mode) {
7944 switch (tmp) {
7945 case 0:
7946 default:
7947 return 4096 * 2;
7948 case 1:
7949 return 1920 * 2;
7950 case 2:
7951 return 2560 * 2;
7952 }
7953 }
7954
7955 /* controller not enabled, so no lb used */
7956 return 0;
7957}
7958
7959/**
7960 * cik_get_number_of_dram_channels - get the number of dram channels
7961 *
7962 * @rdev: radeon_device pointer
7963 *
7964 * Look up the number of video ram channels (CIK).
7965 * Used for display watermark bandwidth calculations
7966 * Returns the number of dram channels
7967 */
7968static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7969{
7970 u32 tmp = RREG32(MC_SHARED_CHMAP);
7971
7972 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7973 case 0:
7974 default:
7975 return 1;
7976 case 1:
7977 return 2;
7978 case 2:
7979 return 4;
7980 case 3:
7981 return 8;
7982 case 4:
7983 return 3;
7984 case 5:
7985 return 6;
7986 case 6:
7987 return 10;
7988 case 7:
7989 return 12;
7990 case 8:
7991 return 16;
7992 }
7993}
7994
7995struct dce8_wm_params {
7996 u32 dram_channels; /* number of dram channels */
7997 u32 yclk; /* bandwidth per dram data pin in kHz */
7998 u32 sclk; /* engine clock in kHz */
7999 u32 disp_clk; /* display clock in kHz */
8000 u32 src_width; /* viewport width */
8001 u32 active_time; /* active display time in ns */
8002 u32 blank_time; /* blank time in ns */
8003 bool interlaced; /* mode is interlaced */
8004 fixed20_12 vsc; /* vertical scale ratio */
8005 u32 num_heads; /* number of active crtcs */
8006 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8007 u32 lb_size; /* line buffer allocated to pipe */
8008 u32 vtaps; /* vertical scaler taps */
8009};
8010
8011/**
8012 * dce8_dram_bandwidth - get the dram bandwidth
8013 *
8014 * @wm: watermark calculation data
8015 *
8016 * Calculate the raw dram bandwidth (CIK).
8017 * Used for display watermark bandwidth calculations
8018 * Returns the dram bandwidth in MBytes/s
8019 */
8020static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8021{
8022 /* Calculate raw DRAM Bandwidth */
8023 fixed20_12 dram_efficiency; /* 0.7 */
8024 fixed20_12 yclk, dram_channels, bandwidth;
8025 fixed20_12 a;
8026
8027 a.full = dfixed_const(1000);
8028 yclk.full = dfixed_const(wm->yclk);
8029 yclk.full = dfixed_div(yclk, a);
8030 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8031 a.full = dfixed_const(10);
8032 dram_efficiency.full = dfixed_const(7);
8033 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8034 bandwidth.full = dfixed_mul(dram_channels, yclk);
8035 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8036
8037 return dfixed_trunc(bandwidth);
8038}
8039
8040/**
8041 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8042 *
8043 * @wm: watermark calculation data
8044 *
8045 * Calculate the dram bandwidth used for display (CIK).
8046 * Used for display watermark bandwidth calculations
8047 * Returns the dram bandwidth for display in MBytes/s
8048 */
8049static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8050{
8051 /* Calculate DRAM Bandwidth and the part allocated to display. */
8052 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8053 fixed20_12 yclk, dram_channels, bandwidth;
8054 fixed20_12 a;
8055
8056 a.full = dfixed_const(1000);
8057 yclk.full = dfixed_const(wm->yclk);
8058 yclk.full = dfixed_div(yclk, a);
8059 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8060 a.full = dfixed_const(10);
8061 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8062 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8063 bandwidth.full = dfixed_mul(dram_channels, yclk);
8064 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8065
8066 return dfixed_trunc(bandwidth);
8067}
8068
8069/**
8070 * dce8_data_return_bandwidth - get the data return bandwidth
8071 *
8072 * @wm: watermark calculation data
8073 *
8074 * Calculate the data return bandwidth used for display (CIK).
8075 * Used for display watermark bandwidth calculations
8076 * Returns the data return bandwidth in MBytes/s
8077 */
8078static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8079{
8080 /* Calculate the display Data return Bandwidth */
8081 fixed20_12 return_efficiency; /* 0.8 */
8082 fixed20_12 sclk, bandwidth;
8083 fixed20_12 a;
8084
8085 a.full = dfixed_const(1000);
8086 sclk.full = dfixed_const(wm->sclk);
8087 sclk.full = dfixed_div(sclk, a);
8088 a.full = dfixed_const(10);
8089 return_efficiency.full = dfixed_const(8);
8090 return_efficiency.full = dfixed_div(return_efficiency, a);
8091 a.full = dfixed_const(32);
8092 bandwidth.full = dfixed_mul(a, sclk);
8093 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8094
8095 return dfixed_trunc(bandwidth);
8096}
8097
8098/**
8099 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8100 *
8101 * @wm: watermark calculation data
8102 *
8103 * Calculate the dmif bandwidth used for display (CIK).
8104 * Used for display watermark bandwidth calculations
8105 * Returns the dmif bandwidth in MBytes/s
8106 */
8107static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8108{
8109 /* Calculate the DMIF Request Bandwidth */
8110 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8111 fixed20_12 disp_clk, bandwidth;
8112 fixed20_12 a, b;
8113
8114 a.full = dfixed_const(1000);
8115 disp_clk.full = dfixed_const(wm->disp_clk);
8116 disp_clk.full = dfixed_div(disp_clk, a);
8117 a.full = dfixed_const(32);
8118 b.full = dfixed_mul(a, disp_clk);
8119
8120 a.full = dfixed_const(10);
8121 disp_clk_request_efficiency.full = dfixed_const(8);
8122 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8123
8124 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8125
8126 return dfixed_trunc(bandwidth);
8127}
8128
8129/**
8130 * dce8_available_bandwidth - get the min available bandwidth
8131 *
8132 * @wm: watermark calculation data
8133 *
8134 * Calculate the min available bandwidth used for display (CIK).
8135 * Used for display watermark bandwidth calculations
8136 * Returns the min available bandwidth in MBytes/s
8137 */
8138static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8139{
8140 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8141 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8142 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8143 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8144
8145 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8146}
8147
8148/**
8149 * dce8_average_bandwidth - get the average available bandwidth
8150 *
8151 * @wm: watermark calculation data
8152 *
8153 * Calculate the average available bandwidth used for display (CIK).
8154 * Used for display watermark bandwidth calculations
8155 * Returns the average available bandwidth in MBytes/s
8156 */
8157static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8158{
8159 /* Calculate the display mode Average Bandwidth
8160 * DisplayMode should contain the source and destination dimensions,
8161 * timing, etc.
8162 */
8163 fixed20_12 bpp;
8164 fixed20_12 line_time;
8165 fixed20_12 src_width;
8166 fixed20_12 bandwidth;
8167 fixed20_12 a;
8168
8169 a.full = dfixed_const(1000);
8170 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8171 line_time.full = dfixed_div(line_time, a);
8172 bpp.full = dfixed_const(wm->bytes_per_pixel);
8173 src_width.full = dfixed_const(wm->src_width);
8174 bandwidth.full = dfixed_mul(src_width, bpp);
8175 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8176 bandwidth.full = dfixed_div(bandwidth, line_time);
8177
8178 return dfixed_trunc(bandwidth);
8179}
8180
8181/**
8182 * dce8_latency_watermark - get the latency watermark
8183 *
8184 * @wm: watermark calculation data
8185 *
8186 * Calculate the latency watermark (CIK).
8187 * Used for display watermark bandwidth calculations
8188 * Returns the latency watermark in ns
8189 */
8190static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8191{
8192 /* First calculate the latency in ns */
8193 u32 mc_latency = 2000; /* 2000 ns. */
8194 u32 available_bandwidth = dce8_available_bandwidth(wm);
8195 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8196 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8197 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8198 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8199 (wm->num_heads * cursor_line_pair_return_time);
8200 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8201 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8202 u32 tmp, dmif_size = 12288;
8203 fixed20_12 a, b, c;
8204
8205 if (wm->num_heads == 0)
8206 return 0;
8207
8208 a.full = dfixed_const(2);
8209 b.full = dfixed_const(1);
8210 if ((wm->vsc.full > a.full) ||
8211 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8212 (wm->vtaps >= 5) ||
8213 ((wm->vsc.full >= a.full) && wm->interlaced))
8214 max_src_lines_per_dst_line = 4;
8215 else
8216 max_src_lines_per_dst_line = 2;
8217
8218 a.full = dfixed_const(available_bandwidth);
8219 b.full = dfixed_const(wm->num_heads);
8220 a.full = dfixed_div(a, b);
8221
8222 b.full = dfixed_const(mc_latency + 512);
8223 c.full = dfixed_const(wm->disp_clk);
8224 b.full = dfixed_div(b, c);
8225
8226 c.full = dfixed_const(dmif_size);
8227 b.full = dfixed_div(c, b);
8228
8229 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8230
8231 b.full = dfixed_const(1000);
8232 c.full = dfixed_const(wm->disp_clk);
8233 b.full = dfixed_div(c, b);
8234 c.full = dfixed_const(wm->bytes_per_pixel);
8235 b.full = dfixed_mul(b, c);
8236
8237 lb_fill_bw = min(tmp, dfixed_trunc(b));
8238
8239 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8240 b.full = dfixed_const(1000);
8241 c.full = dfixed_const(lb_fill_bw);
8242 b.full = dfixed_div(c, b);
8243 a.full = dfixed_div(a, b);
8244 line_fill_time = dfixed_trunc(a);
8245
8246 if (line_fill_time < wm->active_time)
8247 return latency;
8248 else
8249 return latency + (line_fill_time - wm->active_time);
8250
8251}
8252
8253/**
8254 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8255 * average and available dram bandwidth
8256 *
8257 * @wm: watermark calculation data
8258 *
8259 * Check if the display average bandwidth fits in the display
8260 * dram bandwidth (CIK).
8261 * Used for display watermark bandwidth calculations
8262 * Returns true if the display fits, false if not.
8263 */
8264static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8265{
8266 if (dce8_average_bandwidth(wm) <=
8267 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8268 return true;
8269 else
8270 return false;
8271}
8272
8273/**
8274 * dce8_average_bandwidth_vs_available_bandwidth - check
8275 * average and available bandwidth
8276 *
8277 * @wm: watermark calculation data
8278 *
8279 * Check if the display average bandwidth fits in the display
8280 * available bandwidth (CIK).
8281 * Used for display watermark bandwidth calculations
8282 * Returns true if the display fits, false if not.
8283 */
8284static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8285{
8286 if (dce8_average_bandwidth(wm) <=
8287 (dce8_available_bandwidth(wm) / wm->num_heads))
8288 return true;
8289 else
8290 return false;
8291}
8292
8293/**
8294 * dce8_check_latency_hiding - check latency hiding
8295 *
8296 * @wm: watermark calculation data
8297 *
8298 * Check latency hiding (CIK).
8299 * Used for display watermark bandwidth calculations
8300 * Returns true if the display fits, false if not.
8301 */
8302static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8303{
8304 u32 lb_partitions = wm->lb_size / wm->src_width;
8305 u32 line_time = wm->active_time + wm->blank_time;
8306 u32 latency_tolerant_lines;
8307 u32 latency_hiding;
8308 fixed20_12 a;
8309
8310 a.full = dfixed_const(1);
8311 if (wm->vsc.full > a.full)
8312 latency_tolerant_lines = 1;
8313 else {
8314 if (lb_partitions <= (wm->vtaps + 1))
8315 latency_tolerant_lines = 1;
8316 else
8317 latency_tolerant_lines = 2;
8318 }
8319
8320 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8321
8322 if (dce8_latency_watermark(wm) <= latency_hiding)
8323 return true;
8324 else
8325 return false;
8326}
8327
8328/**
8329 * dce8_program_watermarks - program display watermarks
8330 *
8331 * @rdev: radeon_device pointer
8332 * @radeon_crtc: the selected display controller
8333 * @lb_size: line buffer size
8334 * @num_heads: number of display controllers in use
8335 *
8336 * Calculate and program the display watermarks for the
8337 * selected display controller (CIK).
8338 */
8339static void dce8_program_watermarks(struct radeon_device *rdev,
8340 struct radeon_crtc *radeon_crtc,
8341 u32 lb_size, u32 num_heads)
8342{
8343 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05008344 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04008345 u32 pixel_period;
8346 u32 line_time = 0;
8347 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8348 u32 tmp, wm_mask;
8349
8350 if (radeon_crtc->base.enabled && num_heads && mode) {
8351 pixel_period = 1000000 / (u32)mode->clock;
8352 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8353
Alex Deucher58ea2de2013-01-24 10:03:39 -05008354 /* watermark for high clocks */
8355 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8356 rdev->pm.dpm_enabled) {
8357 wm_high.yclk =
8358 radeon_dpm_get_mclk(rdev, false) * 10;
8359 wm_high.sclk =
8360 radeon_dpm_get_sclk(rdev, false) * 10;
8361 } else {
8362 wm_high.yclk = rdev->pm.current_mclk * 10;
8363 wm_high.sclk = rdev->pm.current_sclk * 10;
8364 }
8365
8366 wm_high.disp_clk = mode->clock;
8367 wm_high.src_width = mode->crtc_hdisplay;
8368 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8369 wm_high.blank_time = line_time - wm_high.active_time;
8370 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04008371 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008372 wm_high.interlaced = true;
8373 wm_high.vsc = radeon_crtc->vsc;
8374 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04008375 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008376 wm_high.vtaps = 2;
8377 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8378 wm_high.lb_size = lb_size;
8379 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8380 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04008381
8382 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008383 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04008384
8385 /* possibly force display priority to high */
8386 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008387 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8388 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8389 !dce8_check_latency_hiding(&wm_high) ||
8390 (rdev->disp_priority == 2)) {
8391 DRM_DEBUG_KMS("force priority to high\n");
8392 }
8393
8394 /* watermark for low clocks */
8395 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8396 rdev->pm.dpm_enabled) {
8397 wm_low.yclk =
8398 radeon_dpm_get_mclk(rdev, true) * 10;
8399 wm_low.sclk =
8400 radeon_dpm_get_sclk(rdev, true) * 10;
8401 } else {
8402 wm_low.yclk = rdev->pm.current_mclk * 10;
8403 wm_low.sclk = rdev->pm.current_sclk * 10;
8404 }
8405
8406 wm_low.disp_clk = mode->clock;
8407 wm_low.src_width = mode->crtc_hdisplay;
8408 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8409 wm_low.blank_time = line_time - wm_low.active_time;
8410 wm_low.interlaced = false;
8411 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8412 wm_low.interlaced = true;
8413 wm_low.vsc = radeon_crtc->vsc;
8414 wm_low.vtaps = 1;
8415 if (radeon_crtc->rmx_type != RMX_OFF)
8416 wm_low.vtaps = 2;
8417 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8418 wm_low.lb_size = lb_size;
8419 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8420 wm_low.num_heads = num_heads;
8421
8422 /* set for low clocks */
8423 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8424
8425 /* possibly force display priority to high */
8426 /* should really do this at mode validation time... */
8427 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8428 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8429 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04008430 (rdev->disp_priority == 2)) {
8431 DRM_DEBUG_KMS("force priority to high\n");
8432 }
8433 }
8434
8435 /* select wm A */
8436 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8437 tmp = wm_mask;
8438 tmp &= ~LATENCY_WATERMARK_MASK(3);
8439 tmp |= LATENCY_WATERMARK_MASK(1);
8440 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8441 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8442 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8443 LATENCY_HIGH_WATERMARK(line_time)));
8444 /* select wm B */
8445 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8446 tmp &= ~LATENCY_WATERMARK_MASK(3);
8447 tmp |= LATENCY_WATERMARK_MASK(2);
8448 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8449 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8450 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8451 LATENCY_HIGH_WATERMARK(line_time)));
8452 /* restore original selection */
8453 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05008454
8455 /* save values for DPM */
8456 radeon_crtc->line_time = line_time;
8457 radeon_crtc->wm_high = latency_watermark_a;
8458 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04008459}
8460
8461/**
8462 * dce8_bandwidth_update - program display watermarks
8463 *
8464 * @rdev: radeon_device pointer
8465 *
8466 * Calculate and program the display watermarks and line
8467 * buffer allocation (CIK).
8468 */
8469void dce8_bandwidth_update(struct radeon_device *rdev)
8470{
8471 struct drm_display_mode *mode = NULL;
8472 u32 num_heads = 0, lb_size;
8473 int i;
8474
8475 radeon_update_display_priority(rdev);
8476
8477 for (i = 0; i < rdev->num_crtc; i++) {
8478 if (rdev->mode_info.crtcs[i]->base.enabled)
8479 num_heads++;
8480 }
8481 for (i = 0; i < rdev->num_crtc; i++) {
8482 mode = &rdev->mode_info.crtcs[i]->base.mode;
8483 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8484 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8485 }
8486}
Alex Deucher44fa3462012-12-18 22:17:00 -05008487
8488/**
8489 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8490 *
8491 * @rdev: radeon_device pointer
8492 *
8493 * Fetches a GPU clock counter snapshot (SI).
8494 * Returns the 64 bit clock counter snapshot.
8495 */
8496uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8497{
8498 uint64_t clock;
8499
8500 mutex_lock(&rdev->gpu_clock_mutex);
8501 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8502 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8503 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8504 mutex_unlock(&rdev->gpu_clock_mutex);
8505 return clock;
8506}
8507
Christian König87167bb2013-04-09 13:39:21 -04008508static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8509 u32 cntl_reg, u32 status_reg)
8510{
8511 int r, i;
8512 struct atom_clock_dividers dividers;
8513 uint32_t tmp;
8514
8515 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8516 clock, false, &dividers);
8517 if (r)
8518 return r;
8519
8520 tmp = RREG32_SMC(cntl_reg);
8521 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8522 tmp |= dividers.post_divider;
8523 WREG32_SMC(cntl_reg, tmp);
8524
8525 for (i = 0; i < 100; i++) {
8526 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8527 break;
8528 mdelay(10);
8529 }
8530 if (i == 100)
8531 return -ETIMEDOUT;
8532
8533 return 0;
8534}
8535
8536int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8537{
8538 int r = 0;
8539
8540 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8541 if (r)
8542 return r;
8543
8544 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8545 return r;
8546}
8547
8548int cik_uvd_resume(struct radeon_device *rdev)
8549{
8550 uint64_t addr;
8551 uint32_t size;
8552 int r;
8553
8554 r = radeon_uvd_resume(rdev);
8555 if (r)
8556 return r;
8557
8558 /* programm the VCPU memory controller bits 0-27 */
8559 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02008560 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04008561 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8562 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8563
8564 addr += size;
8565 size = RADEON_UVD_STACK_SIZE >> 3;
8566 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8567 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8568
8569 addr += size;
8570 size = RADEON_UVD_HEAP_SIZE >> 3;
8571 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8572 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8573
8574 /* bits 28-31 */
8575 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8576 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8577
8578 /* bits 32-39 */
8579 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8580 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8581
8582 return 0;
8583}
Alex Deucher8a7cd272013-08-06 11:29:39 -04008584
8585static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8586{
8587 struct pci_dev *root = rdev->pdev->bus->self;
8588 int bridge_pos, gpu_pos;
8589 u32 speed_cntl, mask, current_data_rate;
8590 int ret, i;
8591 u16 tmp16;
8592
8593 if (radeon_pcie_gen2 == 0)
8594 return;
8595
8596 if (rdev->flags & RADEON_IS_IGP)
8597 return;
8598
8599 if (!(rdev->flags & RADEON_IS_PCIE))
8600 return;
8601
8602 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8603 if (ret != 0)
8604 return;
8605
8606 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8607 return;
8608
8609 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8610 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8611 LC_CURRENT_DATA_RATE_SHIFT;
8612 if (mask & DRM_PCIE_SPEED_80) {
8613 if (current_data_rate == 2) {
8614 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8615 return;
8616 }
8617 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8618 } else if (mask & DRM_PCIE_SPEED_50) {
8619 if (current_data_rate == 1) {
8620 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8621 return;
8622 }
8623 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8624 }
8625
8626 bridge_pos = pci_pcie_cap(root);
8627 if (!bridge_pos)
8628 return;
8629
8630 gpu_pos = pci_pcie_cap(rdev->pdev);
8631 if (!gpu_pos)
8632 return;
8633
8634 if (mask & DRM_PCIE_SPEED_80) {
8635 /* re-try equalization if gen3 is not already enabled */
8636 if (current_data_rate != 2) {
8637 u16 bridge_cfg, gpu_cfg;
8638 u16 bridge_cfg2, gpu_cfg2;
8639 u32 max_lw, current_lw, tmp;
8640
8641 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8642 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8643
8644 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8645 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8646
8647 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8648 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8649
8650 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8651 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8652 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8653
8654 if (current_lw < max_lw) {
8655 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8656 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8657 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8658 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8659 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8660 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8661 }
8662 }
8663
8664 for (i = 0; i < 10; i++) {
8665 /* check status */
8666 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8667 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8668 break;
8669
8670 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8671 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8672
8673 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8674 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8675
8676 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8677 tmp |= LC_SET_QUIESCE;
8678 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8679
8680 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8681 tmp |= LC_REDO_EQ;
8682 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8683
8684 mdelay(100);
8685
8686 /* linkctl */
8687 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8688 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8689 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8690 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8691
8692 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8693 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8694 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8695 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8696
8697 /* linkctl2 */
8698 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8699 tmp16 &= ~((1 << 4) | (7 << 9));
8700 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8701 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8702
8703 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8704 tmp16 &= ~((1 << 4) | (7 << 9));
8705 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8706 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8707
8708 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8709 tmp &= ~LC_SET_QUIESCE;
8710 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8711 }
8712 }
8713 }
8714
8715 /* set the link speed */
8716 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8717 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8718 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8719
8720 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8721 tmp16 &= ~0xf;
8722 if (mask & DRM_PCIE_SPEED_80)
8723 tmp16 |= 3; /* gen3 */
8724 else if (mask & DRM_PCIE_SPEED_50)
8725 tmp16 |= 2; /* gen2 */
8726 else
8727 tmp16 |= 1; /* gen1 */
8728 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8729
8730 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8731 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8732 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8733
8734 for (i = 0; i < rdev->usec_timeout; i++) {
8735 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8736 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8737 break;
8738 udelay(1);
8739 }
8740}
Alex Deucher7235711a42013-04-04 13:58:09 -04008741
8742static void cik_program_aspm(struct radeon_device *rdev)
8743{
8744 u32 data, orig;
8745 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8746 bool disable_clkreq = false;
8747
8748 if (radeon_aspm == 0)
8749 return;
8750
8751 /* XXX double check IGPs */
8752 if (rdev->flags & RADEON_IS_IGP)
8753 return;
8754
8755 if (!(rdev->flags & RADEON_IS_PCIE))
8756 return;
8757
8758 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8759 data &= ~LC_XMIT_N_FTS_MASK;
8760 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8761 if (orig != data)
8762 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8763
8764 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8765 data |= LC_GO_TO_RECOVERY;
8766 if (orig != data)
8767 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8768
8769 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8770 data |= P_IGNORE_EDB_ERR;
8771 if (orig != data)
8772 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8773
8774 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8775 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8776 data |= LC_PMI_TO_L1_DIS;
8777 if (!disable_l0s)
8778 data |= LC_L0S_INACTIVITY(7);
8779
8780 if (!disable_l1) {
8781 data |= LC_L1_INACTIVITY(7);
8782 data &= ~LC_PMI_TO_L1_DIS;
8783 if (orig != data)
8784 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8785
8786 if (!disable_plloff_in_l1) {
8787 bool clk_req_support;
8788
8789 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8790 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8791 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8792 if (orig != data)
8793 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8794
8795 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8796 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8797 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8798 if (orig != data)
8799 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8800
8801 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8802 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8803 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8804 if (orig != data)
8805 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8806
8807 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8808 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8809 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8810 if (orig != data)
8811 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8812
8813 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8814 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8815 data |= LC_DYN_LANES_PWR_STATE(3);
8816 if (orig != data)
8817 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8818
8819 if (!disable_clkreq) {
8820 struct pci_dev *root = rdev->pdev->bus->self;
8821 u32 lnkcap;
8822
8823 clk_req_support = false;
8824 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8825 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8826 clk_req_support = true;
8827 } else {
8828 clk_req_support = false;
8829 }
8830
8831 if (clk_req_support) {
8832 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8833 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8834 if (orig != data)
8835 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8836
8837 orig = data = RREG32_SMC(THM_CLK_CNTL);
8838 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8839 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8840 if (orig != data)
8841 WREG32_SMC(THM_CLK_CNTL, data);
8842
8843 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8844 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8845 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8846 if (orig != data)
8847 WREG32_SMC(MISC_CLK_CTRL, data);
8848
8849 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8850 data &= ~BCLK_AS_XCLK;
8851 if (orig != data)
8852 WREG32_SMC(CG_CLKPIN_CNTL, data);
8853
8854 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8855 data &= ~FORCE_BIF_REFCLK_EN;
8856 if (orig != data)
8857 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8858
8859 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8860 data &= ~MPLL_CLKOUT_SEL_MASK;
8861 data |= MPLL_CLKOUT_SEL(4);
8862 if (orig != data)
8863 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8864 }
8865 }
8866 } else {
8867 if (orig != data)
8868 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8869 }
8870
8871 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8872 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8873 if (orig != data)
8874 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8875
8876 if (!disable_l0s) {
8877 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8878 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8879 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8880 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8881 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8882 data &= ~LC_L0S_INACTIVITY_MASK;
8883 if (orig != data)
8884 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8885 }
8886 }
8887 }
8888}