blob: ce7036ae9f5a3e6a21187f4bcf3498688ac65509 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher22c775c2013-07-23 09:41:05 -040034#include "clearstate_ci.h"
Alex Deucher02c81322012-12-18 21:43:07 -050035
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040042MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -040043MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050044MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040049MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050050MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51MODULE_FIRMWARE("radeon/KABINI_me.bin");
52MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040055MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050056
Alex Deuchera59781b2012-11-09 10:45:57 -050057extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040059extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040061extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -040062extern void sumo_rlc_fini(struct radeon_device *rdev);
63extern int sumo_rlc_init(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040064extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher866d83d2013-04-15 17:13:29 -040065extern void si_rlc_reset(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040066extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040067static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040068static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040069static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040070static void cik_init_pg(struct radeon_device *rdev);
71static void cik_init_cg(struct radeon_device *rdev);
Alex Deucher77df5082013-08-09 10:02:40 -040072void cik_uvd_resume(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073
Alex Deucher286d9cc2013-06-21 15:50:47 -040074/* get temperature in millidegrees */
75int ci_get_temp(struct radeon_device *rdev)
76{
77 u32 temp;
78 int actual_temp = 0;
79
80 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
81 CTF_TEMP_SHIFT;
82
83 if (temp & 0x200)
84 actual_temp = 255;
85 else
86 actual_temp = temp & 0x1ff;
87
88 actual_temp = actual_temp * 1000;
89
90 return actual_temp;
91}
92
93/* get temperature in millidegrees */
94int kv_get_temp(struct radeon_device *rdev)
95{
96 u32 temp;
97 int actual_temp = 0;
98
99 temp = RREG32_SMC(0xC0300E0C);
100
101 if (temp)
102 actual_temp = (temp / 8) - 49;
103 else
104 actual_temp = 0;
105
106 actual_temp = actual_temp * 1000;
107
108 return actual_temp;
109}
110
Alex Deucher6e2c3c02013-04-03 19:28:32 -0400111/*
112 * Indirect registers accessor
113 */
114u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
115{
116 u32 r;
117
118 WREG32(PCIE_INDEX, reg);
119 (void)RREG32(PCIE_INDEX);
120 r = RREG32(PCIE_DATA);
121 return r;
122}
123
124void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
125{
126 WREG32(PCIE_INDEX, reg);
127 (void)RREG32(PCIE_INDEX);
128 WREG32(PCIE_DATA, v);
129 (void)RREG32(PCIE_DATA);
130}
131
Alex Deucher22c775c2013-07-23 09:41:05 -0400132static const u32 spectre_rlc_save_restore_register_list[] =
133{
134 (0x0e00 << 16) | (0xc12c >> 2),
135 0x00000000,
136 (0x0e00 << 16) | (0xc140 >> 2),
137 0x00000000,
138 (0x0e00 << 16) | (0xc150 >> 2),
139 0x00000000,
140 (0x0e00 << 16) | (0xc15c >> 2),
141 0x00000000,
142 (0x0e00 << 16) | (0xc168 >> 2),
143 0x00000000,
144 (0x0e00 << 16) | (0xc170 >> 2),
145 0x00000000,
146 (0x0e00 << 16) | (0xc178 >> 2),
147 0x00000000,
148 (0x0e00 << 16) | (0xc204 >> 2),
149 0x00000000,
150 (0x0e00 << 16) | (0xc2b4 >> 2),
151 0x00000000,
152 (0x0e00 << 16) | (0xc2b8 >> 2),
153 0x00000000,
154 (0x0e00 << 16) | (0xc2bc >> 2),
155 0x00000000,
156 (0x0e00 << 16) | (0xc2c0 >> 2),
157 0x00000000,
158 (0x0e00 << 16) | (0x8228 >> 2),
159 0x00000000,
160 (0x0e00 << 16) | (0x829c >> 2),
161 0x00000000,
162 (0x0e00 << 16) | (0x869c >> 2),
163 0x00000000,
164 (0x0600 << 16) | (0x98f4 >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0x98f8 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0x9900 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc260 >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0x90e8 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0x3c000 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0x3c00c >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0x8c1c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0x9700 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xcd20 >> 2),
183 0x00000000,
184 (0x4e00 << 16) | (0xcd20 >> 2),
185 0x00000000,
186 (0x5e00 << 16) | (0xcd20 >> 2),
187 0x00000000,
188 (0x6e00 << 16) | (0xcd20 >> 2),
189 0x00000000,
190 (0x7e00 << 16) | (0xcd20 >> 2),
191 0x00000000,
192 (0x8e00 << 16) | (0xcd20 >> 2),
193 0x00000000,
194 (0x9e00 << 16) | (0xcd20 >> 2),
195 0x00000000,
196 (0xae00 << 16) | (0xcd20 >> 2),
197 0x00000000,
198 (0xbe00 << 16) | (0xcd20 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0x89bc >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0x8900 >> 2),
203 0x00000000,
204 0x3,
205 (0x0e00 << 16) | (0xc130 >> 2),
206 0x00000000,
207 (0x0e00 << 16) | (0xc134 >> 2),
208 0x00000000,
209 (0x0e00 << 16) | (0xc1fc >> 2),
210 0x00000000,
211 (0x0e00 << 16) | (0xc208 >> 2),
212 0x00000000,
213 (0x0e00 << 16) | (0xc264 >> 2),
214 0x00000000,
215 (0x0e00 << 16) | (0xc268 >> 2),
216 0x00000000,
217 (0x0e00 << 16) | (0xc26c >> 2),
218 0x00000000,
219 (0x0e00 << 16) | (0xc270 >> 2),
220 0x00000000,
221 (0x0e00 << 16) | (0xc274 >> 2),
222 0x00000000,
223 (0x0e00 << 16) | (0xc278 >> 2),
224 0x00000000,
225 (0x0e00 << 16) | (0xc27c >> 2),
226 0x00000000,
227 (0x0e00 << 16) | (0xc280 >> 2),
228 0x00000000,
229 (0x0e00 << 16) | (0xc284 >> 2),
230 0x00000000,
231 (0x0e00 << 16) | (0xc288 >> 2),
232 0x00000000,
233 (0x0e00 << 16) | (0xc28c >> 2),
234 0x00000000,
235 (0x0e00 << 16) | (0xc290 >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc294 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc298 >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc29c >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc2a0 >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc2a4 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc2a8 >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc2ac >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc2b0 >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0x301d0 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0x30238 >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0x30250 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0x30254 >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0x30258 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0x3025c >> 2),
264 0x00000000,
265 (0x4e00 << 16) | (0xc900 >> 2),
266 0x00000000,
267 (0x5e00 << 16) | (0xc900 >> 2),
268 0x00000000,
269 (0x6e00 << 16) | (0xc900 >> 2),
270 0x00000000,
271 (0x7e00 << 16) | (0xc900 >> 2),
272 0x00000000,
273 (0x8e00 << 16) | (0xc900 >> 2),
274 0x00000000,
275 (0x9e00 << 16) | (0xc900 >> 2),
276 0x00000000,
277 (0xae00 << 16) | (0xc900 >> 2),
278 0x00000000,
279 (0xbe00 << 16) | (0xc900 >> 2),
280 0x00000000,
281 (0x4e00 << 16) | (0xc904 >> 2),
282 0x00000000,
283 (0x5e00 << 16) | (0xc904 >> 2),
284 0x00000000,
285 (0x6e00 << 16) | (0xc904 >> 2),
286 0x00000000,
287 (0x7e00 << 16) | (0xc904 >> 2),
288 0x00000000,
289 (0x8e00 << 16) | (0xc904 >> 2),
290 0x00000000,
291 (0x9e00 << 16) | (0xc904 >> 2),
292 0x00000000,
293 (0xae00 << 16) | (0xc904 >> 2),
294 0x00000000,
295 (0xbe00 << 16) | (0xc904 >> 2),
296 0x00000000,
297 (0x4e00 << 16) | (0xc908 >> 2),
298 0x00000000,
299 (0x5e00 << 16) | (0xc908 >> 2),
300 0x00000000,
301 (0x6e00 << 16) | (0xc908 >> 2),
302 0x00000000,
303 (0x7e00 << 16) | (0xc908 >> 2),
304 0x00000000,
305 (0x8e00 << 16) | (0xc908 >> 2),
306 0x00000000,
307 (0x9e00 << 16) | (0xc908 >> 2),
308 0x00000000,
309 (0xae00 << 16) | (0xc908 >> 2),
310 0x00000000,
311 (0xbe00 << 16) | (0xc908 >> 2),
312 0x00000000,
313 (0x4e00 << 16) | (0xc90c >> 2),
314 0x00000000,
315 (0x5e00 << 16) | (0xc90c >> 2),
316 0x00000000,
317 (0x6e00 << 16) | (0xc90c >> 2),
318 0x00000000,
319 (0x7e00 << 16) | (0xc90c >> 2),
320 0x00000000,
321 (0x8e00 << 16) | (0xc90c >> 2),
322 0x00000000,
323 (0x9e00 << 16) | (0xc90c >> 2),
324 0x00000000,
325 (0xae00 << 16) | (0xc90c >> 2),
326 0x00000000,
327 (0xbe00 << 16) | (0xc90c >> 2),
328 0x00000000,
329 (0x4e00 << 16) | (0xc910 >> 2),
330 0x00000000,
331 (0x5e00 << 16) | (0xc910 >> 2),
332 0x00000000,
333 (0x6e00 << 16) | (0xc910 >> 2),
334 0x00000000,
335 (0x7e00 << 16) | (0xc910 >> 2),
336 0x00000000,
337 (0x8e00 << 16) | (0xc910 >> 2),
338 0x00000000,
339 (0x9e00 << 16) | (0xc910 >> 2),
340 0x00000000,
341 (0xae00 << 16) | (0xc910 >> 2),
342 0x00000000,
343 (0xbe00 << 16) | (0xc910 >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0xc99c >> 2),
346 0x00000000,
347 (0x0e00 << 16) | (0x9834 >> 2),
348 0x00000000,
349 (0x0000 << 16) | (0x30f00 >> 2),
350 0x00000000,
351 (0x0001 << 16) | (0x30f00 >> 2),
352 0x00000000,
353 (0x0000 << 16) | (0x30f04 >> 2),
354 0x00000000,
355 (0x0001 << 16) | (0x30f04 >> 2),
356 0x00000000,
357 (0x0000 << 16) | (0x30f08 >> 2),
358 0x00000000,
359 (0x0001 << 16) | (0x30f08 >> 2),
360 0x00000000,
361 (0x0000 << 16) | (0x30f0c >> 2),
362 0x00000000,
363 (0x0001 << 16) | (0x30f0c >> 2),
364 0x00000000,
365 (0x0600 << 16) | (0x9b7c >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0x8a14 >> 2),
368 0x00000000,
369 (0x0e00 << 16) | (0x8a18 >> 2),
370 0x00000000,
371 (0x0600 << 16) | (0x30a00 >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0x8bf0 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0x8bcc >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0x8b24 >> 2),
378 0x00000000,
379 (0x0e00 << 16) | (0x30a04 >> 2),
380 0x00000000,
381 (0x0600 << 16) | (0x30a10 >> 2),
382 0x00000000,
383 (0x0600 << 16) | (0x30a14 >> 2),
384 0x00000000,
385 (0x0600 << 16) | (0x30a18 >> 2),
386 0x00000000,
387 (0x0600 << 16) | (0x30a2c >> 2),
388 0x00000000,
389 (0x0e00 << 16) | (0xc700 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0xc704 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0xc708 >> 2),
394 0x00000000,
395 (0x0e00 << 16) | (0xc768 >> 2),
396 0x00000000,
397 (0x0400 << 16) | (0xc770 >> 2),
398 0x00000000,
399 (0x0400 << 16) | (0xc774 >> 2),
400 0x00000000,
401 (0x0400 << 16) | (0xc778 >> 2),
402 0x00000000,
403 (0x0400 << 16) | (0xc77c >> 2),
404 0x00000000,
405 (0x0400 << 16) | (0xc780 >> 2),
406 0x00000000,
407 (0x0400 << 16) | (0xc784 >> 2),
408 0x00000000,
409 (0x0400 << 16) | (0xc788 >> 2),
410 0x00000000,
411 (0x0400 << 16) | (0xc78c >> 2),
412 0x00000000,
413 (0x0400 << 16) | (0xc798 >> 2),
414 0x00000000,
415 (0x0400 << 16) | (0xc79c >> 2),
416 0x00000000,
417 (0x0400 << 16) | (0xc7a0 >> 2),
418 0x00000000,
419 (0x0400 << 16) | (0xc7a4 >> 2),
420 0x00000000,
421 (0x0400 << 16) | (0xc7a8 >> 2),
422 0x00000000,
423 (0x0400 << 16) | (0xc7ac >> 2),
424 0x00000000,
425 (0x0400 << 16) | (0xc7b0 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc7b4 >> 2),
428 0x00000000,
429 (0x0e00 << 16) | (0x9100 >> 2),
430 0x00000000,
431 (0x0e00 << 16) | (0x3c010 >> 2),
432 0x00000000,
433 (0x0e00 << 16) | (0x92a8 >> 2),
434 0x00000000,
435 (0x0e00 << 16) | (0x92ac >> 2),
436 0x00000000,
437 (0x0e00 << 16) | (0x92b4 >> 2),
438 0x00000000,
439 (0x0e00 << 16) | (0x92b8 >> 2),
440 0x00000000,
441 (0x0e00 << 16) | (0x92bc >> 2),
442 0x00000000,
443 (0x0e00 << 16) | (0x92c0 >> 2),
444 0x00000000,
445 (0x0e00 << 16) | (0x92c4 >> 2),
446 0x00000000,
447 (0x0e00 << 16) | (0x92c8 >> 2),
448 0x00000000,
449 (0x0e00 << 16) | (0x92cc >> 2),
450 0x00000000,
451 (0x0e00 << 16) | (0x92d0 >> 2),
452 0x00000000,
453 (0x0e00 << 16) | (0x8c00 >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0x8c04 >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x8c20 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x8c38 >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x8c3c >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0xae00 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x9604 >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0xac08 >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0xac0c >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0xac10 >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0xac14 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0xac58 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0xac68 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0xac6c >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0xac70 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0xac74 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0xac78 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0xac7c >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0xac80 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xac84 >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xac88 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0xac8c >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0x970c >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0x9714 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0x9718 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0x971c >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0x31068 >> 2),
506 0x00000000,
507 (0x4e00 << 16) | (0x31068 >> 2),
508 0x00000000,
509 (0x5e00 << 16) | (0x31068 >> 2),
510 0x00000000,
511 (0x6e00 << 16) | (0x31068 >> 2),
512 0x00000000,
513 (0x7e00 << 16) | (0x31068 >> 2),
514 0x00000000,
515 (0x8e00 << 16) | (0x31068 >> 2),
516 0x00000000,
517 (0x9e00 << 16) | (0x31068 >> 2),
518 0x00000000,
519 (0xae00 << 16) | (0x31068 >> 2),
520 0x00000000,
521 (0xbe00 << 16) | (0x31068 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xcd10 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0xcd14 >> 2),
526 0x00000000,
527 (0x0e00 << 16) | (0x88b0 >> 2),
528 0x00000000,
529 (0x0e00 << 16) | (0x88b4 >> 2),
530 0x00000000,
531 (0x0e00 << 16) | (0x88b8 >> 2),
532 0x00000000,
533 (0x0e00 << 16) | (0x88bc >> 2),
534 0x00000000,
535 (0x0400 << 16) | (0x89c0 >> 2),
536 0x00000000,
537 (0x0e00 << 16) | (0x88c4 >> 2),
538 0x00000000,
539 (0x0e00 << 16) | (0x88c8 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0x88d0 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0x88d4 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x88d8 >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x8980 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x30938 >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x3093c >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0x30940 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x89a0 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x30900 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x30904 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x89b4 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x3c210 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x3c214 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x3c218 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x8904 >> 2),
570 0x00000000,
571 0x5,
572 (0x0e00 << 16) | (0x8c28 >> 2),
573 (0x0e00 << 16) | (0x8c2c >> 2),
574 (0x0e00 << 16) | (0x8c30 >> 2),
575 (0x0e00 << 16) | (0x8c34 >> 2),
576 (0x0e00 << 16) | (0x9600 >> 2),
577};
578
579static const u32 kalindi_rlc_save_restore_register_list[] =
580{
581 (0x0e00 << 16) | (0xc12c >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0xc140 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0xc150 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0xc15c >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0xc168 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0xc170 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0xc204 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0xc2b4 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0xc2b8 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0xc2bc >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xc2c0 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0x8228 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0x829c >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0x869c >> 2),
608 0x00000000,
609 (0x0600 << 16) | (0x98f4 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0x98f8 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0x9900 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc260 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0x90e8 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0x3c000 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0x3c00c >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0x8c1c >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0x9700 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xcd20 >> 2),
628 0x00000000,
629 (0x4e00 << 16) | (0xcd20 >> 2),
630 0x00000000,
631 (0x5e00 << 16) | (0xcd20 >> 2),
632 0x00000000,
633 (0x6e00 << 16) | (0xcd20 >> 2),
634 0x00000000,
635 (0x7e00 << 16) | (0xcd20 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x89bc >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x8900 >> 2),
640 0x00000000,
641 0x3,
642 (0x0e00 << 16) | (0xc130 >> 2),
643 0x00000000,
644 (0x0e00 << 16) | (0xc134 >> 2),
645 0x00000000,
646 (0x0e00 << 16) | (0xc1fc >> 2),
647 0x00000000,
648 (0x0e00 << 16) | (0xc208 >> 2),
649 0x00000000,
650 (0x0e00 << 16) | (0xc264 >> 2),
651 0x00000000,
652 (0x0e00 << 16) | (0xc268 >> 2),
653 0x00000000,
654 (0x0e00 << 16) | (0xc26c >> 2),
655 0x00000000,
656 (0x0e00 << 16) | (0xc270 >> 2),
657 0x00000000,
658 (0x0e00 << 16) | (0xc274 >> 2),
659 0x00000000,
660 (0x0e00 << 16) | (0xc28c >> 2),
661 0x00000000,
662 (0x0e00 << 16) | (0xc290 >> 2),
663 0x00000000,
664 (0x0e00 << 16) | (0xc294 >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xc298 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xc2a0 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0xc2a4 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0xc2a8 >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc2ac >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0x301d0 >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0x30238 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0x30250 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0x30254 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0x30258 >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0x3025c >> 2),
687 0x00000000,
688 (0x4e00 << 16) | (0xc900 >> 2),
689 0x00000000,
690 (0x5e00 << 16) | (0xc900 >> 2),
691 0x00000000,
692 (0x6e00 << 16) | (0xc900 >> 2),
693 0x00000000,
694 (0x7e00 << 16) | (0xc900 >> 2),
695 0x00000000,
696 (0x4e00 << 16) | (0xc904 >> 2),
697 0x00000000,
698 (0x5e00 << 16) | (0xc904 >> 2),
699 0x00000000,
700 (0x6e00 << 16) | (0xc904 >> 2),
701 0x00000000,
702 (0x7e00 << 16) | (0xc904 >> 2),
703 0x00000000,
704 (0x4e00 << 16) | (0xc908 >> 2),
705 0x00000000,
706 (0x5e00 << 16) | (0xc908 >> 2),
707 0x00000000,
708 (0x6e00 << 16) | (0xc908 >> 2),
709 0x00000000,
710 (0x7e00 << 16) | (0xc908 >> 2),
711 0x00000000,
712 (0x4e00 << 16) | (0xc90c >> 2),
713 0x00000000,
714 (0x5e00 << 16) | (0xc90c >> 2),
715 0x00000000,
716 (0x6e00 << 16) | (0xc90c >> 2),
717 0x00000000,
718 (0x7e00 << 16) | (0xc90c >> 2),
719 0x00000000,
720 (0x4e00 << 16) | (0xc910 >> 2),
721 0x00000000,
722 (0x5e00 << 16) | (0xc910 >> 2),
723 0x00000000,
724 (0x6e00 << 16) | (0xc910 >> 2),
725 0x00000000,
726 (0x7e00 << 16) | (0xc910 >> 2),
727 0x00000000,
728 (0x0e00 << 16) | (0xc99c >> 2),
729 0x00000000,
730 (0x0e00 << 16) | (0x9834 >> 2),
731 0x00000000,
732 (0x0000 << 16) | (0x30f00 >> 2),
733 0x00000000,
734 (0x0000 << 16) | (0x30f04 >> 2),
735 0x00000000,
736 (0x0000 << 16) | (0x30f08 >> 2),
737 0x00000000,
738 (0x0000 << 16) | (0x30f0c >> 2),
739 0x00000000,
740 (0x0600 << 16) | (0x9b7c >> 2),
741 0x00000000,
742 (0x0e00 << 16) | (0x8a14 >> 2),
743 0x00000000,
744 (0x0e00 << 16) | (0x8a18 >> 2),
745 0x00000000,
746 (0x0600 << 16) | (0x30a00 >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x8bf0 >> 2),
749 0x00000000,
750 (0x0e00 << 16) | (0x8bcc >> 2),
751 0x00000000,
752 (0x0e00 << 16) | (0x8b24 >> 2),
753 0x00000000,
754 (0x0e00 << 16) | (0x30a04 >> 2),
755 0x00000000,
756 (0x0600 << 16) | (0x30a10 >> 2),
757 0x00000000,
758 (0x0600 << 16) | (0x30a14 >> 2),
759 0x00000000,
760 (0x0600 << 16) | (0x30a18 >> 2),
761 0x00000000,
762 (0x0600 << 16) | (0x30a2c >> 2),
763 0x00000000,
764 (0x0e00 << 16) | (0xc700 >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0xc704 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0xc708 >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0xc768 >> 2),
771 0x00000000,
772 (0x0400 << 16) | (0xc770 >> 2),
773 0x00000000,
774 (0x0400 << 16) | (0xc774 >> 2),
775 0x00000000,
776 (0x0400 << 16) | (0xc798 >> 2),
777 0x00000000,
778 (0x0400 << 16) | (0xc79c >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x9100 >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x3c010 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0x8c00 >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0x8c04 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0x8c20 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0x8c38 >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0x8c3c >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xae00 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0x9604 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xac08 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xac0c >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0xac10 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0xac14 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0xac58 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0xac68 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0xac6c >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xac70 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0xac74 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xac78 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xac7c >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xac80 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0xac84 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xac88 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0xac8c >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0x970c >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0x9714 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0x9718 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0x971c >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0x31068 >> 2),
837 0x00000000,
838 (0x4e00 << 16) | (0x31068 >> 2),
839 0x00000000,
840 (0x5e00 << 16) | (0x31068 >> 2),
841 0x00000000,
842 (0x6e00 << 16) | (0x31068 >> 2),
843 0x00000000,
844 (0x7e00 << 16) | (0x31068 >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0xcd10 >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0xcd14 >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0x88b0 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x88b4 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x88b8 >> 2),
855 0x00000000,
856 (0x0e00 << 16) | (0x88bc >> 2),
857 0x00000000,
858 (0x0400 << 16) | (0x89c0 >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x88c4 >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x88c8 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x88d0 >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0x88d4 >> 2),
867 0x00000000,
868 (0x0e00 << 16) | (0x88d8 >> 2),
869 0x00000000,
870 (0x0e00 << 16) | (0x8980 >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x30938 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x3093c >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0x30940 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x89a0 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x30900 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x30904 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x89b4 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x3e1fc >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x3c210 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x3c214 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x3c218 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x8904 >> 2),
895 0x00000000,
896 0x5,
897 (0x0e00 << 16) | (0x8c28 >> 2),
898 (0x0e00 << 16) | (0x8c2c >> 2),
899 (0x0e00 << 16) | (0x8c30 >> 2),
900 (0x0e00 << 16) | (0x8c34 >> 2),
901 (0x0e00 << 16) | (0x9600 >> 2),
902};
903
Alex Deucher0aafd312013-04-09 14:43:30 -0400904static const u32 bonaire_golden_spm_registers[] =
905{
906 0x30800, 0xe0ffffff, 0xe0000000
907};
908
909static const u32 bonaire_golden_common_registers[] =
910{
911 0xc770, 0xffffffff, 0x00000800,
912 0xc774, 0xffffffff, 0x00000800,
913 0xc798, 0xffffffff, 0x00007fbf,
914 0xc79c, 0xffffffff, 0x00007faf
915};
916
917static const u32 bonaire_golden_registers[] =
918{
919 0x3354, 0x00000333, 0x00000333,
920 0x3350, 0x000c0fc0, 0x00040200,
921 0x9a10, 0x00010000, 0x00058208,
922 0x3c000, 0xffff1fff, 0x00140000,
923 0x3c200, 0xfdfc0fff, 0x00000100,
924 0x3c234, 0x40000000, 0x40000200,
925 0x9830, 0xffffffff, 0x00000000,
926 0x9834, 0xf00fffff, 0x00000400,
927 0x9838, 0x0002021c, 0x00020200,
928 0xc78, 0x00000080, 0x00000000,
929 0x5bb0, 0x000000f0, 0x00000070,
930 0x5bc0, 0xf0311fff, 0x80300000,
931 0x98f8, 0x73773777, 0x12010001,
932 0x350c, 0x00810000, 0x408af000,
933 0x7030, 0x31000111, 0x00000011,
934 0x2f48, 0x73773777, 0x12010001,
935 0x220c, 0x00007fb6, 0x0021a1b1,
936 0x2210, 0x00007fb6, 0x002021b1,
937 0x2180, 0x00007fb6, 0x00002191,
938 0x2218, 0x00007fb6, 0x002121b1,
939 0x221c, 0x00007fb6, 0x002021b1,
940 0x21dc, 0x00007fb6, 0x00002191,
941 0x21e0, 0x00007fb6, 0x00002191,
942 0x3628, 0x0000003f, 0x0000000a,
943 0x362c, 0x0000003f, 0x0000000a,
944 0x2ae4, 0x00073ffe, 0x000022a2,
945 0x240c, 0x000007ff, 0x00000000,
946 0x8a14, 0xf000003f, 0x00000007,
947 0x8bf0, 0x00002001, 0x00000001,
948 0x8b24, 0xffffffff, 0x00ffffff,
949 0x30a04, 0x0000ff0f, 0x00000000,
950 0x28a4c, 0x07ffffff, 0x06000000,
951 0x4d8, 0x00000fff, 0x00000100,
952 0x3e78, 0x00000001, 0x00000002,
953 0x9100, 0x03000000, 0x0362c688,
954 0x8c00, 0x000000ff, 0x00000001,
955 0xe40, 0x00001fff, 0x00001fff,
956 0x9060, 0x0000007f, 0x00000020,
957 0x9508, 0x00010000, 0x00010000,
958 0xac14, 0x000003ff, 0x000000f3,
959 0xac0c, 0xffffffff, 0x00001032
960};
961
962static const u32 bonaire_mgcg_cgcg_init[] =
963{
964 0xc420, 0xffffffff, 0xfffffffc,
965 0x30800, 0xffffffff, 0xe0000000,
966 0x3c2a0, 0xffffffff, 0x00000100,
967 0x3c208, 0xffffffff, 0x00000100,
968 0x3c2c0, 0xffffffff, 0xc0000100,
969 0x3c2c8, 0xffffffff, 0xc0000100,
970 0x3c2c4, 0xffffffff, 0xc0000100,
971 0x55e4, 0xffffffff, 0x00600100,
972 0x3c280, 0xffffffff, 0x00000100,
973 0x3c214, 0xffffffff, 0x06000100,
974 0x3c220, 0xffffffff, 0x00000100,
975 0x3c218, 0xffffffff, 0x06000100,
976 0x3c204, 0xffffffff, 0x00000100,
977 0x3c2e0, 0xffffffff, 0x00000100,
978 0x3c224, 0xffffffff, 0x00000100,
979 0x3c200, 0xffffffff, 0x00000100,
980 0x3c230, 0xffffffff, 0x00000100,
981 0x3c234, 0xffffffff, 0x00000100,
982 0x3c250, 0xffffffff, 0x00000100,
983 0x3c254, 0xffffffff, 0x00000100,
984 0x3c258, 0xffffffff, 0x00000100,
985 0x3c25c, 0xffffffff, 0x00000100,
986 0x3c260, 0xffffffff, 0x00000100,
987 0x3c27c, 0xffffffff, 0x00000100,
988 0x3c278, 0xffffffff, 0x00000100,
989 0x3c210, 0xffffffff, 0x06000100,
990 0x3c290, 0xffffffff, 0x00000100,
991 0x3c274, 0xffffffff, 0x00000100,
992 0x3c2b4, 0xffffffff, 0x00000100,
993 0x3c2b0, 0xffffffff, 0x00000100,
994 0x3c270, 0xffffffff, 0x00000100,
995 0x30800, 0xffffffff, 0xe0000000,
996 0x3c020, 0xffffffff, 0x00010000,
997 0x3c024, 0xffffffff, 0x00030002,
998 0x3c028, 0xffffffff, 0x00040007,
999 0x3c02c, 0xffffffff, 0x00060005,
1000 0x3c030, 0xffffffff, 0x00090008,
1001 0x3c034, 0xffffffff, 0x00010000,
1002 0x3c038, 0xffffffff, 0x00030002,
1003 0x3c03c, 0xffffffff, 0x00040007,
1004 0x3c040, 0xffffffff, 0x00060005,
1005 0x3c044, 0xffffffff, 0x00090008,
1006 0x3c048, 0xffffffff, 0x00010000,
1007 0x3c04c, 0xffffffff, 0x00030002,
1008 0x3c050, 0xffffffff, 0x00040007,
1009 0x3c054, 0xffffffff, 0x00060005,
1010 0x3c058, 0xffffffff, 0x00090008,
1011 0x3c05c, 0xffffffff, 0x00010000,
1012 0x3c060, 0xffffffff, 0x00030002,
1013 0x3c064, 0xffffffff, 0x00040007,
1014 0x3c068, 0xffffffff, 0x00060005,
1015 0x3c06c, 0xffffffff, 0x00090008,
1016 0x3c070, 0xffffffff, 0x00010000,
1017 0x3c074, 0xffffffff, 0x00030002,
1018 0x3c078, 0xffffffff, 0x00040007,
1019 0x3c07c, 0xffffffff, 0x00060005,
1020 0x3c080, 0xffffffff, 0x00090008,
1021 0x3c084, 0xffffffff, 0x00010000,
1022 0x3c088, 0xffffffff, 0x00030002,
1023 0x3c08c, 0xffffffff, 0x00040007,
1024 0x3c090, 0xffffffff, 0x00060005,
1025 0x3c094, 0xffffffff, 0x00090008,
1026 0x3c098, 0xffffffff, 0x00010000,
1027 0x3c09c, 0xffffffff, 0x00030002,
1028 0x3c0a0, 0xffffffff, 0x00040007,
1029 0x3c0a4, 0xffffffff, 0x00060005,
1030 0x3c0a8, 0xffffffff, 0x00090008,
1031 0x3c000, 0xffffffff, 0x96e00200,
1032 0x8708, 0xffffffff, 0x00900100,
1033 0xc424, 0xffffffff, 0x0020003f,
1034 0x38, 0xffffffff, 0x0140001c,
1035 0x3c, 0x000f0000, 0x000f0000,
1036 0x220, 0xffffffff, 0xC060000C,
1037 0x224, 0xc0000fff, 0x00000100,
1038 0xf90, 0xffffffff, 0x00000100,
1039 0xf98, 0x00000101, 0x00000000,
1040 0x20a8, 0xffffffff, 0x00000104,
1041 0x55e4, 0xff000fff, 0x00000100,
1042 0x30cc, 0xc0000fff, 0x00000104,
1043 0xc1e4, 0x00000001, 0x00000001,
1044 0xd00c, 0xff000ff0, 0x00000100,
1045 0xd80c, 0xff000ff0, 0x00000100
1046};
1047
1048static const u32 spectre_golden_spm_registers[] =
1049{
1050 0x30800, 0xe0ffffff, 0xe0000000
1051};
1052
1053static const u32 spectre_golden_common_registers[] =
1054{
1055 0xc770, 0xffffffff, 0x00000800,
1056 0xc774, 0xffffffff, 0x00000800,
1057 0xc798, 0xffffffff, 0x00007fbf,
1058 0xc79c, 0xffffffff, 0x00007faf
1059};
1060
1061static const u32 spectre_golden_registers[] =
1062{
1063 0x3c000, 0xffff1fff, 0x96940200,
1064 0x3c00c, 0xffff0001, 0xff000000,
1065 0x3c200, 0xfffc0fff, 0x00000100,
1066 0x6ed8, 0x00010101, 0x00010000,
1067 0x9834, 0xf00fffff, 0x00000400,
1068 0x9838, 0xfffffffc, 0x00020200,
1069 0x5bb0, 0x000000f0, 0x00000070,
1070 0x5bc0, 0xf0311fff, 0x80300000,
1071 0x98f8, 0x73773777, 0x12010001,
1072 0x9b7c, 0x00ff0000, 0x00fc0000,
1073 0x2f48, 0x73773777, 0x12010001,
1074 0x8a14, 0xf000003f, 0x00000007,
1075 0x8b24, 0xffffffff, 0x00ffffff,
1076 0x28350, 0x3f3f3fff, 0x00000082,
1077 0x28355, 0x0000003f, 0x00000000,
1078 0x3e78, 0x00000001, 0x00000002,
1079 0x913c, 0xffff03df, 0x00000004,
1080 0xc768, 0x00000008, 0x00000008,
1081 0x8c00, 0x000008ff, 0x00000800,
1082 0x9508, 0x00010000, 0x00010000,
1083 0xac0c, 0xffffffff, 0x54763210,
1084 0x214f8, 0x01ff01ff, 0x00000002,
1085 0x21498, 0x007ff800, 0x00200000,
1086 0x2015c, 0xffffffff, 0x00000f40,
1087 0x30934, 0xffffffff, 0x00000001
1088};
1089
1090static const u32 spectre_mgcg_cgcg_init[] =
1091{
1092 0xc420, 0xffffffff, 0xfffffffc,
1093 0x30800, 0xffffffff, 0xe0000000,
1094 0x3c2a0, 0xffffffff, 0x00000100,
1095 0x3c208, 0xffffffff, 0x00000100,
1096 0x3c2c0, 0xffffffff, 0x00000100,
1097 0x3c2c8, 0xffffffff, 0x00000100,
1098 0x3c2c4, 0xffffffff, 0x00000100,
1099 0x55e4, 0xffffffff, 0x00600100,
1100 0x3c280, 0xffffffff, 0x00000100,
1101 0x3c214, 0xffffffff, 0x06000100,
1102 0x3c220, 0xffffffff, 0x00000100,
1103 0x3c218, 0xffffffff, 0x06000100,
1104 0x3c204, 0xffffffff, 0x00000100,
1105 0x3c2e0, 0xffffffff, 0x00000100,
1106 0x3c224, 0xffffffff, 0x00000100,
1107 0x3c200, 0xffffffff, 0x00000100,
1108 0x3c230, 0xffffffff, 0x00000100,
1109 0x3c234, 0xffffffff, 0x00000100,
1110 0x3c250, 0xffffffff, 0x00000100,
1111 0x3c254, 0xffffffff, 0x00000100,
1112 0x3c258, 0xffffffff, 0x00000100,
1113 0x3c25c, 0xffffffff, 0x00000100,
1114 0x3c260, 0xffffffff, 0x00000100,
1115 0x3c27c, 0xffffffff, 0x00000100,
1116 0x3c278, 0xffffffff, 0x00000100,
1117 0x3c210, 0xffffffff, 0x06000100,
1118 0x3c290, 0xffffffff, 0x00000100,
1119 0x3c274, 0xffffffff, 0x00000100,
1120 0x3c2b4, 0xffffffff, 0x00000100,
1121 0x3c2b0, 0xffffffff, 0x00000100,
1122 0x3c270, 0xffffffff, 0x00000100,
1123 0x30800, 0xffffffff, 0xe0000000,
1124 0x3c020, 0xffffffff, 0x00010000,
1125 0x3c024, 0xffffffff, 0x00030002,
1126 0x3c028, 0xffffffff, 0x00040007,
1127 0x3c02c, 0xffffffff, 0x00060005,
1128 0x3c030, 0xffffffff, 0x00090008,
1129 0x3c034, 0xffffffff, 0x00010000,
1130 0x3c038, 0xffffffff, 0x00030002,
1131 0x3c03c, 0xffffffff, 0x00040007,
1132 0x3c040, 0xffffffff, 0x00060005,
1133 0x3c044, 0xffffffff, 0x00090008,
1134 0x3c048, 0xffffffff, 0x00010000,
1135 0x3c04c, 0xffffffff, 0x00030002,
1136 0x3c050, 0xffffffff, 0x00040007,
1137 0x3c054, 0xffffffff, 0x00060005,
1138 0x3c058, 0xffffffff, 0x00090008,
1139 0x3c05c, 0xffffffff, 0x00010000,
1140 0x3c060, 0xffffffff, 0x00030002,
1141 0x3c064, 0xffffffff, 0x00040007,
1142 0x3c068, 0xffffffff, 0x00060005,
1143 0x3c06c, 0xffffffff, 0x00090008,
1144 0x3c070, 0xffffffff, 0x00010000,
1145 0x3c074, 0xffffffff, 0x00030002,
1146 0x3c078, 0xffffffff, 0x00040007,
1147 0x3c07c, 0xffffffff, 0x00060005,
1148 0x3c080, 0xffffffff, 0x00090008,
1149 0x3c084, 0xffffffff, 0x00010000,
1150 0x3c088, 0xffffffff, 0x00030002,
1151 0x3c08c, 0xffffffff, 0x00040007,
1152 0x3c090, 0xffffffff, 0x00060005,
1153 0x3c094, 0xffffffff, 0x00090008,
1154 0x3c098, 0xffffffff, 0x00010000,
1155 0x3c09c, 0xffffffff, 0x00030002,
1156 0x3c0a0, 0xffffffff, 0x00040007,
1157 0x3c0a4, 0xffffffff, 0x00060005,
1158 0x3c0a8, 0xffffffff, 0x00090008,
1159 0x3c0ac, 0xffffffff, 0x00010000,
1160 0x3c0b0, 0xffffffff, 0x00030002,
1161 0x3c0b4, 0xffffffff, 0x00040007,
1162 0x3c0b8, 0xffffffff, 0x00060005,
1163 0x3c0bc, 0xffffffff, 0x00090008,
1164 0x3c000, 0xffffffff, 0x96e00200,
1165 0x8708, 0xffffffff, 0x00900100,
1166 0xc424, 0xffffffff, 0x0020003f,
1167 0x38, 0xffffffff, 0x0140001c,
1168 0x3c, 0x000f0000, 0x000f0000,
1169 0x220, 0xffffffff, 0xC060000C,
1170 0x224, 0xc0000fff, 0x00000100,
1171 0xf90, 0xffffffff, 0x00000100,
1172 0xf98, 0x00000101, 0x00000000,
1173 0x20a8, 0xffffffff, 0x00000104,
1174 0x55e4, 0xff000fff, 0x00000100,
1175 0x30cc, 0xc0000fff, 0x00000104,
1176 0xc1e4, 0x00000001, 0x00000001,
1177 0xd00c, 0xff000ff0, 0x00000100,
1178 0xd80c, 0xff000ff0, 0x00000100
1179};
1180
1181static const u32 kalindi_golden_spm_registers[] =
1182{
1183 0x30800, 0xe0ffffff, 0xe0000000
1184};
1185
1186static const u32 kalindi_golden_common_registers[] =
1187{
1188 0xc770, 0xffffffff, 0x00000800,
1189 0xc774, 0xffffffff, 0x00000800,
1190 0xc798, 0xffffffff, 0x00007fbf,
1191 0xc79c, 0xffffffff, 0x00007faf
1192};
1193
1194static const u32 kalindi_golden_registers[] =
1195{
1196 0x3c000, 0xffffdfff, 0x6e944040,
1197 0x55e4, 0xff607fff, 0xfc000100,
1198 0x3c220, 0xff000fff, 0x00000100,
1199 0x3c224, 0xff000fff, 0x00000100,
1200 0x3c200, 0xfffc0fff, 0x00000100,
1201 0x6ed8, 0x00010101, 0x00010000,
1202 0x9830, 0xffffffff, 0x00000000,
1203 0x9834, 0xf00fffff, 0x00000400,
1204 0x5bb0, 0x000000f0, 0x00000070,
1205 0x5bc0, 0xf0311fff, 0x80300000,
1206 0x98f8, 0x73773777, 0x12010001,
1207 0x98fc, 0xffffffff, 0x00000010,
1208 0x9b7c, 0x00ff0000, 0x00fc0000,
1209 0x8030, 0x00001f0f, 0x0000100a,
1210 0x2f48, 0x73773777, 0x12010001,
1211 0x2408, 0x000fffff, 0x000c007f,
1212 0x8a14, 0xf000003f, 0x00000007,
1213 0x8b24, 0x3fff3fff, 0x00ffcfff,
1214 0x30a04, 0x0000ff0f, 0x00000000,
1215 0x28a4c, 0x07ffffff, 0x06000000,
1216 0x4d8, 0x00000fff, 0x00000100,
1217 0x3e78, 0x00000001, 0x00000002,
1218 0xc768, 0x00000008, 0x00000008,
1219 0x8c00, 0x000000ff, 0x00000003,
1220 0x214f8, 0x01ff01ff, 0x00000002,
1221 0x21498, 0x007ff800, 0x00200000,
1222 0x2015c, 0xffffffff, 0x00000f40,
1223 0x88c4, 0x001f3ae3, 0x00000082,
1224 0x88d4, 0x0000001f, 0x00000010,
1225 0x30934, 0xffffffff, 0x00000000
1226};
1227
1228static const u32 kalindi_mgcg_cgcg_init[] =
1229{
1230 0xc420, 0xffffffff, 0xfffffffc,
1231 0x30800, 0xffffffff, 0xe0000000,
1232 0x3c2a0, 0xffffffff, 0x00000100,
1233 0x3c208, 0xffffffff, 0x00000100,
1234 0x3c2c0, 0xffffffff, 0x00000100,
1235 0x3c2c8, 0xffffffff, 0x00000100,
1236 0x3c2c4, 0xffffffff, 0x00000100,
1237 0x55e4, 0xffffffff, 0x00600100,
1238 0x3c280, 0xffffffff, 0x00000100,
1239 0x3c214, 0xffffffff, 0x06000100,
1240 0x3c220, 0xffffffff, 0x00000100,
1241 0x3c218, 0xffffffff, 0x06000100,
1242 0x3c204, 0xffffffff, 0x00000100,
1243 0x3c2e0, 0xffffffff, 0x00000100,
1244 0x3c224, 0xffffffff, 0x00000100,
1245 0x3c200, 0xffffffff, 0x00000100,
1246 0x3c230, 0xffffffff, 0x00000100,
1247 0x3c234, 0xffffffff, 0x00000100,
1248 0x3c250, 0xffffffff, 0x00000100,
1249 0x3c254, 0xffffffff, 0x00000100,
1250 0x3c258, 0xffffffff, 0x00000100,
1251 0x3c25c, 0xffffffff, 0x00000100,
1252 0x3c260, 0xffffffff, 0x00000100,
1253 0x3c27c, 0xffffffff, 0x00000100,
1254 0x3c278, 0xffffffff, 0x00000100,
1255 0x3c210, 0xffffffff, 0x06000100,
1256 0x3c290, 0xffffffff, 0x00000100,
1257 0x3c274, 0xffffffff, 0x00000100,
1258 0x3c2b4, 0xffffffff, 0x00000100,
1259 0x3c2b0, 0xffffffff, 0x00000100,
1260 0x3c270, 0xffffffff, 0x00000100,
1261 0x30800, 0xffffffff, 0xe0000000,
1262 0x3c020, 0xffffffff, 0x00010000,
1263 0x3c024, 0xffffffff, 0x00030002,
1264 0x3c028, 0xffffffff, 0x00040007,
1265 0x3c02c, 0xffffffff, 0x00060005,
1266 0x3c030, 0xffffffff, 0x00090008,
1267 0x3c034, 0xffffffff, 0x00010000,
1268 0x3c038, 0xffffffff, 0x00030002,
1269 0x3c03c, 0xffffffff, 0x00040007,
1270 0x3c040, 0xffffffff, 0x00060005,
1271 0x3c044, 0xffffffff, 0x00090008,
1272 0x3c000, 0xffffffff, 0x96e00200,
1273 0x8708, 0xffffffff, 0x00900100,
1274 0xc424, 0xffffffff, 0x0020003f,
1275 0x38, 0xffffffff, 0x0140001c,
1276 0x3c, 0x000f0000, 0x000f0000,
1277 0x220, 0xffffffff, 0xC060000C,
1278 0x224, 0xc0000fff, 0x00000100,
1279 0x20a8, 0xffffffff, 0x00000104,
1280 0x55e4, 0xff000fff, 0x00000100,
1281 0x30cc, 0xc0000fff, 0x00000104,
1282 0xc1e4, 0x00000001, 0x00000001,
1283 0xd00c, 0xff000ff0, 0x00000100,
1284 0xd80c, 0xff000ff0, 0x00000100
1285};
1286
1287static void cik_init_golden_registers(struct radeon_device *rdev)
1288{
1289 switch (rdev->family) {
1290 case CHIP_BONAIRE:
1291 radeon_program_register_sequence(rdev,
1292 bonaire_mgcg_cgcg_init,
1293 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1294 radeon_program_register_sequence(rdev,
1295 bonaire_golden_registers,
1296 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1297 radeon_program_register_sequence(rdev,
1298 bonaire_golden_common_registers,
1299 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1300 radeon_program_register_sequence(rdev,
1301 bonaire_golden_spm_registers,
1302 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1303 break;
1304 case CHIP_KABINI:
1305 radeon_program_register_sequence(rdev,
1306 kalindi_mgcg_cgcg_init,
1307 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1308 radeon_program_register_sequence(rdev,
1309 kalindi_golden_registers,
1310 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1311 radeon_program_register_sequence(rdev,
1312 kalindi_golden_common_registers,
1313 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1314 radeon_program_register_sequence(rdev,
1315 kalindi_golden_spm_registers,
1316 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1317 break;
1318 case CHIP_KAVERI:
1319 radeon_program_register_sequence(rdev,
1320 spectre_mgcg_cgcg_init,
1321 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1322 radeon_program_register_sequence(rdev,
1323 spectre_golden_registers,
1324 (const u32)ARRAY_SIZE(spectre_golden_registers));
1325 radeon_program_register_sequence(rdev,
1326 spectre_golden_common_registers,
1327 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1328 radeon_program_register_sequence(rdev,
1329 spectre_golden_spm_registers,
1330 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1331 break;
1332 default:
1333 break;
1334 }
1335}
1336
Alex Deucher2c679122013-04-09 13:32:18 -04001337/**
1338 * cik_get_xclk - get the xclk
1339 *
1340 * @rdev: radeon_device pointer
1341 *
1342 * Returns the reference clock used by the gfx engine
1343 * (CIK).
1344 */
1345u32 cik_get_xclk(struct radeon_device *rdev)
1346{
1347 u32 reference_clock = rdev->clock.spll.reference_freq;
1348
1349 if (rdev->flags & RADEON_IS_IGP) {
1350 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1351 return reference_clock / 2;
1352 } else {
1353 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1354 return reference_clock / 4;
1355 }
1356 return reference_clock;
1357}
1358
Alex Deucher75efdee2013-03-04 12:47:46 -05001359/**
1360 * cik_mm_rdoorbell - read a doorbell dword
1361 *
1362 * @rdev: radeon_device pointer
1363 * @offset: byte offset into the aperture
1364 *
1365 * Returns the value in the doorbell aperture at the
1366 * requested offset (CIK).
1367 */
1368u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1369{
1370 if (offset < rdev->doorbell.size) {
1371 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1372 } else {
1373 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1374 return 0;
1375 }
1376}
1377
1378/**
1379 * cik_mm_wdoorbell - write a doorbell dword
1380 *
1381 * @rdev: radeon_device pointer
1382 * @offset: byte offset into the aperture
1383 * @v: value to write
1384 *
1385 * Writes @v to the doorbell aperture at the
1386 * requested offset (CIK).
1387 */
1388void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1389{
1390 if (offset < rdev->doorbell.size) {
1391 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1392 } else {
1393 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1394 }
1395}
1396
Alex Deucherbc8273f2012-06-29 19:44:04 -04001397#define BONAIRE_IO_MC_REGS_SIZE 36
1398
1399static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1400{
1401 {0x00000070, 0x04400000},
1402 {0x00000071, 0x80c01803},
1403 {0x00000072, 0x00004004},
1404 {0x00000073, 0x00000100},
1405 {0x00000074, 0x00ff0000},
1406 {0x00000075, 0x34000000},
1407 {0x00000076, 0x08000014},
1408 {0x00000077, 0x00cc08ec},
1409 {0x00000078, 0x00000400},
1410 {0x00000079, 0x00000000},
1411 {0x0000007a, 0x04090000},
1412 {0x0000007c, 0x00000000},
1413 {0x0000007e, 0x4408a8e8},
1414 {0x0000007f, 0x00000304},
1415 {0x00000080, 0x00000000},
1416 {0x00000082, 0x00000001},
1417 {0x00000083, 0x00000002},
1418 {0x00000084, 0xf3e4f400},
1419 {0x00000085, 0x052024e3},
1420 {0x00000087, 0x00000000},
1421 {0x00000088, 0x01000000},
1422 {0x0000008a, 0x1c0a0000},
1423 {0x0000008b, 0xff010000},
1424 {0x0000008d, 0xffffefff},
1425 {0x0000008e, 0xfff3efff},
1426 {0x0000008f, 0xfff3efbf},
1427 {0x00000092, 0xf7ffffff},
1428 {0x00000093, 0xffffff7f},
1429 {0x00000095, 0x00101101},
1430 {0x00000096, 0x00000fff},
1431 {0x00000097, 0x00116fff},
1432 {0x00000098, 0x60010000},
1433 {0x00000099, 0x10010000},
1434 {0x0000009a, 0x00006000},
1435 {0x0000009b, 0x00001000},
1436 {0x0000009f, 0x00b48000}
1437};
1438
Alex Deucherb556b122013-01-29 10:44:22 -05001439/**
1440 * cik_srbm_select - select specific register instances
1441 *
1442 * @rdev: radeon_device pointer
1443 * @me: selected ME (micro engine)
1444 * @pipe: pipe
1445 * @queue: queue
1446 * @vmid: VMID
1447 *
1448 * Switches the currently active registers instances. Some
1449 * registers are instanced per VMID, others are instanced per
1450 * me/pipe/queue combination.
1451 */
1452static void cik_srbm_select(struct radeon_device *rdev,
1453 u32 me, u32 pipe, u32 queue, u32 vmid)
1454{
1455 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1456 MEID(me & 0x3) |
1457 VMID(vmid & 0xf) |
1458 QUEUEID(queue & 0x7));
1459 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1460}
1461
Alex Deucherbc8273f2012-06-29 19:44:04 -04001462/* ucode loading */
1463/**
1464 * ci_mc_load_microcode - load MC ucode into the hw
1465 *
1466 * @rdev: radeon_device pointer
1467 *
1468 * Load the GDDR MC ucode into the hw (CIK).
1469 * Returns 0 on success, error on failure.
1470 */
1471static int ci_mc_load_microcode(struct radeon_device *rdev)
1472{
1473 const __be32 *fw_data;
1474 u32 running, blackout = 0;
1475 u32 *io_mc_regs;
1476 int i, ucode_size, regs_size;
1477
1478 if (!rdev->mc_fw)
1479 return -EINVAL;
1480
1481 switch (rdev->family) {
1482 case CHIP_BONAIRE:
1483 default:
1484 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1485 ucode_size = CIK_MC_UCODE_SIZE;
1486 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1487 break;
1488 }
1489
1490 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1491
1492 if (running == 0) {
1493 if (running) {
1494 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1495 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1496 }
1497
1498 /* reset the engine and set to writable */
1499 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1500 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1501
1502 /* load mc io regs */
1503 for (i = 0; i < regs_size; i++) {
1504 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1505 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1506 }
1507 /* load the MC ucode */
1508 fw_data = (const __be32 *)rdev->mc_fw->data;
1509 for (i = 0; i < ucode_size; i++)
1510 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1511
1512 /* put the engine back into the active state */
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1514 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1515 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1516
1517 /* wait for training to complete */
1518 for (i = 0; i < rdev->usec_timeout; i++) {
1519 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1520 break;
1521 udelay(1);
1522 }
1523 for (i = 0; i < rdev->usec_timeout; i++) {
1524 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1525 break;
1526 udelay(1);
1527 }
1528
1529 if (running)
1530 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1531 }
1532
1533 return 0;
1534}
1535
Alex Deucher02c81322012-12-18 21:43:07 -05001536/**
1537 * cik_init_microcode - load ucode images from disk
1538 *
1539 * @rdev: radeon_device pointer
1540 *
1541 * Use the firmware interface to load the ucode images into
1542 * the driver (not loaded into hw).
1543 * Returns 0 on success, error on failure.
1544 */
1545static int cik_init_microcode(struct radeon_device *rdev)
1546{
Alex Deucher02c81322012-12-18 21:43:07 -05001547 const char *chip_name;
1548 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -04001549 mec_req_size, rlc_req_size, mc_req_size,
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001550 sdma_req_size, smc_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -05001551 char fw_name[30];
1552 int err;
1553
1554 DRM_DEBUG("\n");
1555
Alex Deucher02c81322012-12-18 21:43:07 -05001556 switch (rdev->family) {
1557 case CHIP_BONAIRE:
1558 chip_name = "BONAIRE";
1559 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1560 me_req_size = CIK_ME_UCODE_SIZE * 4;
1561 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1562 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1563 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1564 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001565 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001566 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
Alex Deucher02c81322012-12-18 21:43:07 -05001567 break;
1568 case CHIP_KAVERI:
1569 chip_name = "KAVERI";
1570 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1571 me_req_size = CIK_ME_UCODE_SIZE * 4;
1572 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1573 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1574 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001575 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001576 break;
1577 case CHIP_KABINI:
1578 chip_name = "KABINI";
1579 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1580 me_req_size = CIK_ME_UCODE_SIZE * 4;
1581 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1582 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1583 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001584 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001585 break;
1586 default: BUG();
1587 }
1588
1589 DRM_INFO("Loading %s Microcode\n", chip_name);
1590
1591 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001592 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001593 if (err)
1594 goto out;
1595 if (rdev->pfp_fw->size != pfp_req_size) {
1596 printk(KERN_ERR
1597 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1598 rdev->pfp_fw->size, fw_name);
1599 err = -EINVAL;
1600 goto out;
1601 }
1602
1603 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001604 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001605 if (err)
1606 goto out;
1607 if (rdev->me_fw->size != me_req_size) {
1608 printk(KERN_ERR
1609 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1610 rdev->me_fw->size, fw_name);
1611 err = -EINVAL;
1612 }
1613
1614 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001615 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001616 if (err)
1617 goto out;
1618 if (rdev->ce_fw->size != ce_req_size) {
1619 printk(KERN_ERR
1620 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1621 rdev->ce_fw->size, fw_name);
1622 err = -EINVAL;
1623 }
1624
1625 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001626 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001627 if (err)
1628 goto out;
1629 if (rdev->mec_fw->size != mec_req_size) {
1630 printk(KERN_ERR
1631 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1632 rdev->mec_fw->size, fw_name);
1633 err = -EINVAL;
1634 }
1635
1636 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001637 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001638 if (err)
1639 goto out;
1640 if (rdev->rlc_fw->size != rlc_req_size) {
1641 printk(KERN_ERR
1642 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1643 rdev->rlc_fw->size, fw_name);
1644 err = -EINVAL;
1645 }
1646
Alex Deucher21a93e12013-04-09 12:47:11 -04001647 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001648 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -04001649 if (err)
1650 goto out;
1651 if (rdev->sdma_fw->size != sdma_req_size) {
1652 printk(KERN_ERR
1653 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1654 rdev->sdma_fw->size, fw_name);
1655 err = -EINVAL;
1656 }
1657
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001658 /* No SMC, MC ucode on APUs */
Alex Deucher02c81322012-12-18 21:43:07 -05001659 if (!(rdev->flags & RADEON_IS_IGP)) {
1660 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001661 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001662 if (err)
1663 goto out;
1664 if (rdev->mc_fw->size != mc_req_size) {
1665 printk(KERN_ERR
1666 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1667 rdev->mc_fw->size, fw_name);
1668 err = -EINVAL;
1669 }
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001670
1671 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673 if (err) {
1674 printk(KERN_ERR
1675 "smc: error loading firmware \"%s\"\n",
1676 fw_name);
1677 release_firmware(rdev->smc_fw);
1678 rdev->smc_fw = NULL;
1679 } else if (rdev->smc_fw->size != smc_req_size) {
1680 printk(KERN_ERR
1681 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1682 rdev->smc_fw->size, fw_name);
1683 err = -EINVAL;
1684 }
Alex Deucher02c81322012-12-18 21:43:07 -05001685 }
1686
1687out:
Alex Deucher02c81322012-12-18 21:43:07 -05001688 if (err) {
1689 if (err != -EINVAL)
1690 printk(KERN_ERR
1691 "cik_cp: Failed to load firmware \"%s\"\n",
1692 fw_name);
1693 release_firmware(rdev->pfp_fw);
1694 rdev->pfp_fw = NULL;
1695 release_firmware(rdev->me_fw);
1696 rdev->me_fw = NULL;
1697 release_firmware(rdev->ce_fw);
1698 rdev->ce_fw = NULL;
1699 release_firmware(rdev->rlc_fw);
1700 rdev->rlc_fw = NULL;
1701 release_firmware(rdev->mc_fw);
1702 rdev->mc_fw = NULL;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001703 release_firmware(rdev->smc_fw);
1704 rdev->smc_fw = NULL;
Alex Deucher02c81322012-12-18 21:43:07 -05001705 }
1706 return err;
1707}
1708
Alex Deucher8cc1a532013-04-09 12:41:24 -04001709/*
1710 * Core functions
1711 */
1712/**
1713 * cik_tiling_mode_table_init - init the hw tiling table
1714 *
1715 * @rdev: radeon_device pointer
1716 *
1717 * Starting with SI, the tiling setup is done globally in a
1718 * set of 32 tiling modes. Rather than selecting each set of
1719 * parameters per surface as on older asics, we just select
1720 * which index in the tiling table we want to use, and the
1721 * surface uses those parameters (CIK).
1722 */
1723static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1724{
1725 const u32 num_tile_mode_states = 32;
1726 const u32 num_secondary_tile_mode_states = 16;
1727 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1728 u32 num_pipe_configs;
1729 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1730 rdev->config.cik.max_shader_engines;
1731
1732 switch (rdev->config.cik.mem_row_size_in_kb) {
1733 case 1:
1734 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1735 break;
1736 case 2:
1737 default:
1738 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1739 break;
1740 case 4:
1741 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1742 break;
1743 }
1744
1745 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1746 if (num_pipe_configs > 8)
1747 num_pipe_configs = 8; /* ??? */
1748
1749 if (num_pipe_configs == 8) {
1750 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1751 switch (reg_offset) {
1752 case 0:
1753 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1755 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1756 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1757 break;
1758 case 1:
1759 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1761 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1762 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1763 break;
1764 case 2:
1765 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1766 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1767 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1768 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1769 break;
1770 case 3:
1771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1775 break;
1776 case 4:
1777 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780 TILE_SPLIT(split_equal_to_row_size));
1781 break;
1782 case 5:
1783 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1785 break;
1786 case 6:
1787 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1791 break;
1792 case 7:
1793 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796 TILE_SPLIT(split_equal_to_row_size));
1797 break;
1798 case 8:
1799 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1800 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1801 break;
1802 case 9:
1803 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1805 break;
1806 case 10:
1807 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1809 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1811 break;
1812 case 11:
1813 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1814 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1815 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1817 break;
1818 case 12:
1819 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1821 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1823 break;
1824 case 13:
1825 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1826 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1827 break;
1828 case 14:
1829 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1831 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833 break;
1834 case 16:
1835 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1837 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839 break;
1840 case 17:
1841 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1845 break;
1846 case 27:
1847 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1848 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1849 break;
1850 case 28:
1851 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1852 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1853 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855 break;
1856 case 29:
1857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1859 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861 break;
1862 case 30:
1863 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1864 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1865 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867 break;
1868 default:
1869 gb_tile_moden = 0;
1870 break;
1871 }
Alex Deucher39aee492013-04-10 13:41:25 -04001872 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001873 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1874 }
1875 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1876 switch (reg_offset) {
1877 case 0:
1878 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1879 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1880 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1881 NUM_BANKS(ADDR_SURF_16_BANK));
1882 break;
1883 case 1:
1884 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1887 NUM_BANKS(ADDR_SURF_16_BANK));
1888 break;
1889 case 2:
1890 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1893 NUM_BANKS(ADDR_SURF_16_BANK));
1894 break;
1895 case 3:
1896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1899 NUM_BANKS(ADDR_SURF_16_BANK));
1900 break;
1901 case 4:
1902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1905 NUM_BANKS(ADDR_SURF_8_BANK));
1906 break;
1907 case 5:
1908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1911 NUM_BANKS(ADDR_SURF_4_BANK));
1912 break;
1913 case 6:
1914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1917 NUM_BANKS(ADDR_SURF_2_BANK));
1918 break;
1919 case 8:
1920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1923 NUM_BANKS(ADDR_SURF_16_BANK));
1924 break;
1925 case 9:
1926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1929 NUM_BANKS(ADDR_SURF_16_BANK));
1930 break;
1931 case 10:
1932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1935 NUM_BANKS(ADDR_SURF_16_BANK));
1936 break;
1937 case 11:
1938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1941 NUM_BANKS(ADDR_SURF_16_BANK));
1942 break;
1943 case 12:
1944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1947 NUM_BANKS(ADDR_SURF_8_BANK));
1948 break;
1949 case 13:
1950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1953 NUM_BANKS(ADDR_SURF_4_BANK));
1954 break;
1955 case 14:
1956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1959 NUM_BANKS(ADDR_SURF_2_BANK));
1960 break;
1961 default:
1962 gb_tile_moden = 0;
1963 break;
1964 }
1965 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1966 }
1967 } else if (num_pipe_configs == 4) {
1968 if (num_rbs == 4) {
1969 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1970 switch (reg_offset) {
1971 case 0:
1972 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1974 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1975 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1976 break;
1977 case 1:
1978 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1979 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1980 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1982 break;
1983 case 2:
1984 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1986 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1987 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1988 break;
1989 case 3:
1990 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1994 break;
1995 case 4:
1996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999 TILE_SPLIT(split_equal_to_row_size));
2000 break;
2001 case 5:
2002 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2004 break;
2005 case 6:
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2010 break;
2011 case 7:
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015 TILE_SPLIT(split_equal_to_row_size));
2016 break;
2017 case 8:
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2019 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2020 break;
2021 case 9:
2022 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2023 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2024 break;
2025 case 10:
2026 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2028 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2030 break;
2031 case 11:
2032 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2034 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2036 break;
2037 case 12:
2038 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2040 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2042 break;
2043 case 13:
2044 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2045 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2046 break;
2047 case 14:
2048 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2050 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052 break;
2053 case 16:
2054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2056 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 break;
2059 case 17:
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 break;
2065 case 27:
2066 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2068 break;
2069 case 28:
2070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 break;
2075 case 29:
2076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2078 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 break;
2081 case 30:
2082 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2084 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 break;
2087 default:
2088 gb_tile_moden = 0;
2089 break;
2090 }
Alex Deucher39aee492013-04-10 13:41:25 -04002091 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002092 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2093 }
2094 } else if (num_rbs < 4) {
2095 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2096 switch (reg_offset) {
2097 case 0:
2098 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2100 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2101 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2102 break;
2103 case 1:
2104 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2106 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2108 break;
2109 case 2:
2110 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2112 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2114 break;
2115 case 3:
2116 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2120 break;
2121 case 4:
2122 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125 TILE_SPLIT(split_equal_to_row_size));
2126 break;
2127 case 5:
2128 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130 break;
2131 case 6:
2132 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2136 break;
2137 case 7:
2138 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 TILE_SPLIT(split_equal_to_row_size));
2142 break;
2143 case 8:
2144 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2145 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2146 break;
2147 case 9:
2148 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2150 break;
2151 case 10:
2152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2154 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156 break;
2157 case 11:
2158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2162 break;
2163 case 12:
2164 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168 break;
2169 case 13:
2170 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2172 break;
2173 case 14:
2174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 break;
2179 case 16:
2180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 break;
2185 case 17:
2186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 break;
2191 case 27:
2192 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2194 break;
2195 case 28:
2196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2198 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 break;
2201 case 29:
2202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 break;
2207 case 30:
2208 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212 break;
2213 default:
2214 gb_tile_moden = 0;
2215 break;
2216 }
Alex Deucher39aee492013-04-10 13:41:25 -04002217 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002218 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2219 }
2220 }
2221 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2222 switch (reg_offset) {
2223 case 0:
2224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2227 NUM_BANKS(ADDR_SURF_16_BANK));
2228 break;
2229 case 1:
2230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2233 NUM_BANKS(ADDR_SURF_16_BANK));
2234 break;
2235 case 2:
2236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239 NUM_BANKS(ADDR_SURF_16_BANK));
2240 break;
2241 case 3:
2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245 NUM_BANKS(ADDR_SURF_16_BANK));
2246 break;
2247 case 4:
2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251 NUM_BANKS(ADDR_SURF_16_BANK));
2252 break;
2253 case 5:
2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_8_BANK));
2258 break;
2259 case 6:
2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263 NUM_BANKS(ADDR_SURF_4_BANK));
2264 break;
2265 case 8:
2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269 NUM_BANKS(ADDR_SURF_16_BANK));
2270 break;
2271 case 9:
2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275 NUM_BANKS(ADDR_SURF_16_BANK));
2276 break;
2277 case 10:
2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 break;
2283 case 11:
2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2288 break;
2289 case 12:
2290 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2294 break;
2295 case 13:
2296 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299 NUM_BANKS(ADDR_SURF_8_BANK));
2300 break;
2301 case 14:
2302 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2305 NUM_BANKS(ADDR_SURF_4_BANK));
2306 break;
2307 default:
2308 gb_tile_moden = 0;
2309 break;
2310 }
2311 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2312 }
2313 } else if (num_pipe_configs == 2) {
2314 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2315 switch (reg_offset) {
2316 case 0:
2317 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2321 break;
2322 case 1:
2323 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2327 break;
2328 case 2:
2329 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2333 break;
2334 case 3:
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2339 break;
2340 case 4:
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 TILE_SPLIT(split_equal_to_row_size));
2345 break;
2346 case 5:
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 break;
2350 case 6:
2351 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 PIPE_CONFIG(ADDR_SURF_P2) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355 break;
2356 case 7:
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P2) |
2360 TILE_SPLIT(split_equal_to_row_size));
2361 break;
2362 case 8:
2363 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2364 break;
2365 case 9:
2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368 break;
2369 case 10:
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P2) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 break;
2375 case 11:
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 PIPE_CONFIG(ADDR_SURF_P2) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 break;
2381 case 12:
2382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 PIPE_CONFIG(ADDR_SURF_P2) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 break;
2387 case 13:
2388 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2390 break;
2391 case 14:
2392 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394 PIPE_CONFIG(ADDR_SURF_P2) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 break;
2397 case 16:
2398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2400 PIPE_CONFIG(ADDR_SURF_P2) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 break;
2403 case 17:
2404 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2406 PIPE_CONFIG(ADDR_SURF_P2) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408 break;
2409 case 27:
2410 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2412 break;
2413 case 28:
2414 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 PIPE_CONFIG(ADDR_SURF_P2) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 break;
2419 case 29:
2420 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422 PIPE_CONFIG(ADDR_SURF_P2) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 break;
2425 case 30:
2426 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 PIPE_CONFIG(ADDR_SURF_P2) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2430 break;
2431 default:
2432 gb_tile_moden = 0;
2433 break;
2434 }
Alex Deucher39aee492013-04-10 13:41:25 -04002435 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002436 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2437 }
2438 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2439 switch (reg_offset) {
2440 case 0:
2441 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2444 NUM_BANKS(ADDR_SURF_16_BANK));
2445 break;
2446 case 1:
2447 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2450 NUM_BANKS(ADDR_SURF_16_BANK));
2451 break;
2452 case 2:
2453 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2456 NUM_BANKS(ADDR_SURF_16_BANK));
2457 break;
2458 case 3:
2459 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462 NUM_BANKS(ADDR_SURF_16_BANK));
2463 break;
2464 case 4:
2465 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468 NUM_BANKS(ADDR_SURF_16_BANK));
2469 break;
2470 case 5:
2471 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 NUM_BANKS(ADDR_SURF_16_BANK));
2475 break;
2476 case 6:
2477 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 break;
2482 case 8:
2483 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486 NUM_BANKS(ADDR_SURF_16_BANK));
2487 break;
2488 case 9:
2489 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492 NUM_BANKS(ADDR_SURF_16_BANK));
2493 break;
2494 case 10:
2495 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2498 NUM_BANKS(ADDR_SURF_16_BANK));
2499 break;
2500 case 11:
2501 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504 NUM_BANKS(ADDR_SURF_16_BANK));
2505 break;
2506 case 12:
2507 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_16_BANK));
2511 break;
2512 case 13:
2513 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516 NUM_BANKS(ADDR_SURF_16_BANK));
2517 break;
2518 case 14:
2519 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 break;
2524 default:
2525 gb_tile_moden = 0;
2526 break;
2527 }
2528 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2529 }
2530 } else
2531 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2532}
2533
2534/**
2535 * cik_select_se_sh - select which SE, SH to address
2536 *
2537 * @rdev: radeon_device pointer
2538 * @se_num: shader engine to address
2539 * @sh_num: sh block to address
2540 *
2541 * Select which SE, SH combinations to address. Certain
2542 * registers are instanced per SE or SH. 0xffffffff means
2543 * broadcast to all SEs or SHs (CIK).
2544 */
2545static void cik_select_se_sh(struct radeon_device *rdev,
2546 u32 se_num, u32 sh_num)
2547{
2548 u32 data = INSTANCE_BROADCAST_WRITES;
2549
2550 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04002551 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002552 else if (se_num == 0xffffffff)
2553 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2554 else if (sh_num == 0xffffffff)
2555 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2556 else
2557 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2558 WREG32(GRBM_GFX_INDEX, data);
2559}
2560
2561/**
2562 * cik_create_bitmask - create a bitmask
2563 *
2564 * @bit_width: length of the mask
2565 *
2566 * create a variable length bit mask (CIK).
2567 * Returns the bitmask.
2568 */
2569static u32 cik_create_bitmask(u32 bit_width)
2570{
2571 u32 i, mask = 0;
2572
2573 for (i = 0; i < bit_width; i++) {
2574 mask <<= 1;
2575 mask |= 1;
2576 }
2577 return mask;
2578}
2579
2580/**
2581 * cik_select_se_sh - select which SE, SH to address
2582 *
2583 * @rdev: radeon_device pointer
2584 * @max_rb_num: max RBs (render backends) for the asic
2585 * @se_num: number of SEs (shader engines) for the asic
2586 * @sh_per_se: number of SH blocks per SE for the asic
2587 *
2588 * Calculates the bitmask of disabled RBs (CIK).
2589 * Returns the disabled RB bitmask.
2590 */
2591static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2592 u32 max_rb_num, u32 se_num,
2593 u32 sh_per_se)
2594{
2595 u32 data, mask;
2596
2597 data = RREG32(CC_RB_BACKEND_DISABLE);
2598 if (data & 1)
2599 data &= BACKEND_DISABLE_MASK;
2600 else
2601 data = 0;
2602 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2603
2604 data >>= BACKEND_DISABLE_SHIFT;
2605
2606 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2607
2608 return data & mask;
2609}
2610
2611/**
2612 * cik_setup_rb - setup the RBs on the asic
2613 *
2614 * @rdev: radeon_device pointer
2615 * @se_num: number of SEs (shader engines) for the asic
2616 * @sh_per_se: number of SH blocks per SE for the asic
2617 * @max_rb_num: max RBs (render backends) for the asic
2618 *
2619 * Configures per-SE/SH RB registers (CIK).
2620 */
2621static void cik_setup_rb(struct radeon_device *rdev,
2622 u32 se_num, u32 sh_per_se,
2623 u32 max_rb_num)
2624{
2625 int i, j;
2626 u32 data, mask;
2627 u32 disabled_rbs = 0;
2628 u32 enabled_rbs = 0;
2629
2630 for (i = 0; i < se_num; i++) {
2631 for (j = 0; j < sh_per_se; j++) {
2632 cik_select_se_sh(rdev, i, j);
2633 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2634 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2635 }
2636 }
2637 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2638
2639 mask = 1;
2640 for (i = 0; i < max_rb_num; i++) {
2641 if (!(disabled_rbs & mask))
2642 enabled_rbs |= mask;
2643 mask <<= 1;
2644 }
2645
2646 for (i = 0; i < se_num; i++) {
2647 cik_select_se_sh(rdev, i, 0xffffffff);
2648 data = 0;
2649 for (j = 0; j < sh_per_se; j++) {
2650 switch (enabled_rbs & 3) {
2651 case 1:
2652 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2653 break;
2654 case 2:
2655 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2656 break;
2657 case 3:
2658 default:
2659 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2660 break;
2661 }
2662 enabled_rbs >>= 2;
2663 }
2664 WREG32(PA_SC_RASTER_CONFIG, data);
2665 }
2666 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2667}
2668
2669/**
2670 * cik_gpu_init - setup the 3D engine
2671 *
2672 * @rdev: radeon_device pointer
2673 *
2674 * Configures the 3D engine and tiling configuration
2675 * registers so that the 3D engine is usable.
2676 */
2677static void cik_gpu_init(struct radeon_device *rdev)
2678{
2679 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2680 u32 mc_shared_chmap, mc_arb_ramcfg;
2681 u32 hdp_host_path_cntl;
2682 u32 tmp;
2683 int i, j;
2684
2685 switch (rdev->family) {
2686 case CHIP_BONAIRE:
2687 rdev->config.cik.max_shader_engines = 2;
2688 rdev->config.cik.max_tile_pipes = 4;
2689 rdev->config.cik.max_cu_per_sh = 7;
2690 rdev->config.cik.max_sh_per_se = 1;
2691 rdev->config.cik.max_backends_per_se = 2;
2692 rdev->config.cik.max_texture_channel_caches = 4;
2693 rdev->config.cik.max_gprs = 256;
2694 rdev->config.cik.max_gs_threads = 32;
2695 rdev->config.cik.max_hw_contexts = 8;
2696
2697 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2698 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2699 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2700 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2701 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2702 break;
2703 case CHIP_KAVERI:
2704 /* TODO */
2705 break;
2706 case CHIP_KABINI:
2707 default:
2708 rdev->config.cik.max_shader_engines = 1;
2709 rdev->config.cik.max_tile_pipes = 2;
2710 rdev->config.cik.max_cu_per_sh = 2;
2711 rdev->config.cik.max_sh_per_se = 1;
2712 rdev->config.cik.max_backends_per_se = 1;
2713 rdev->config.cik.max_texture_channel_caches = 2;
2714 rdev->config.cik.max_gprs = 256;
2715 rdev->config.cik.max_gs_threads = 16;
2716 rdev->config.cik.max_hw_contexts = 8;
2717
2718 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2719 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2720 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2721 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2722 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2723 break;
2724 }
2725
2726 /* Initialize HDP */
2727 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2728 WREG32((0x2c14 + j), 0x00000000);
2729 WREG32((0x2c18 + j), 0x00000000);
2730 WREG32((0x2c1c + j), 0x00000000);
2731 WREG32((0x2c20 + j), 0x00000000);
2732 WREG32((0x2c24 + j), 0x00000000);
2733 }
2734
2735 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2736
2737 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2738
2739 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2740 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2741
2742 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2743 rdev->config.cik.mem_max_burst_length_bytes = 256;
2744 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2745 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2746 if (rdev->config.cik.mem_row_size_in_kb > 4)
2747 rdev->config.cik.mem_row_size_in_kb = 4;
2748 /* XXX use MC settings? */
2749 rdev->config.cik.shader_engine_tile_size = 32;
2750 rdev->config.cik.num_gpus = 1;
2751 rdev->config.cik.multi_gpu_tile_size = 64;
2752
2753 /* fix up row size */
2754 gb_addr_config &= ~ROW_SIZE_MASK;
2755 switch (rdev->config.cik.mem_row_size_in_kb) {
2756 case 1:
2757 default:
2758 gb_addr_config |= ROW_SIZE(0);
2759 break;
2760 case 2:
2761 gb_addr_config |= ROW_SIZE(1);
2762 break;
2763 case 4:
2764 gb_addr_config |= ROW_SIZE(2);
2765 break;
2766 }
2767
2768 /* setup tiling info dword. gb_addr_config is not adequate since it does
2769 * not have bank info, so create a custom tiling dword.
2770 * bits 3:0 num_pipes
2771 * bits 7:4 num_banks
2772 * bits 11:8 group_size
2773 * bits 15:12 row_size
2774 */
2775 rdev->config.cik.tile_config = 0;
2776 switch (rdev->config.cik.num_tile_pipes) {
2777 case 1:
2778 rdev->config.cik.tile_config |= (0 << 0);
2779 break;
2780 case 2:
2781 rdev->config.cik.tile_config |= (1 << 0);
2782 break;
2783 case 4:
2784 rdev->config.cik.tile_config |= (2 << 0);
2785 break;
2786 case 8:
2787 default:
2788 /* XXX what about 12? */
2789 rdev->config.cik.tile_config |= (3 << 0);
2790 break;
2791 }
2792 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2793 rdev->config.cik.tile_config |= 1 << 4;
2794 else
2795 rdev->config.cik.tile_config |= 0 << 4;
2796 rdev->config.cik.tile_config |=
2797 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2798 rdev->config.cik.tile_config |=
2799 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2800
2801 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2802 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2803 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04002804 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2805 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04002806 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2807 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2808 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04002809
2810 cik_tiling_mode_table_init(rdev);
2811
2812 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2813 rdev->config.cik.max_sh_per_se,
2814 rdev->config.cik.max_backends_per_se);
2815
2816 /* set HW defaults for 3D engine */
2817 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2818
2819 WREG32(SX_DEBUG_1, 0x20);
2820
2821 WREG32(TA_CNTL_AUX, 0x00010000);
2822
2823 tmp = RREG32(SPI_CONFIG_CNTL);
2824 tmp |= 0x03000000;
2825 WREG32(SPI_CONFIG_CNTL, tmp);
2826
2827 WREG32(SQ_CONFIG, 1);
2828
2829 WREG32(DB_DEBUG, 0);
2830
2831 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2832 tmp |= 0x00000400;
2833 WREG32(DB_DEBUG2, tmp);
2834
2835 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2836 tmp |= 0x00020200;
2837 WREG32(DB_DEBUG3, tmp);
2838
2839 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2840 tmp |= 0x00018208;
2841 WREG32(CB_HW_CONTROL, tmp);
2842
2843 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2844
2845 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2846 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2847 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2848 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2849
2850 WREG32(VGT_NUM_INSTANCES, 1);
2851
2852 WREG32(CP_PERFMON_CNTL, 0);
2853
2854 WREG32(SQ_CONFIG, 0);
2855
2856 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2857 FORCE_EOV_MAX_REZ_CNT(255)));
2858
2859 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2860 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2861
2862 WREG32(VGT_GS_VERTEX_REUSE, 16);
2863 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2864
2865 tmp = RREG32(HDP_MISC_CNTL);
2866 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2867 WREG32(HDP_MISC_CNTL, tmp);
2868
2869 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2870 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2871
2872 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2873 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2874
2875 udelay(50);
2876}
2877
Alex Deucher841cf442012-12-18 21:47:44 -05002878/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002879 * GPU scratch registers helpers function.
2880 */
2881/**
2882 * cik_scratch_init - setup driver info for CP scratch regs
2883 *
2884 * @rdev: radeon_device pointer
2885 *
2886 * Set up the number and offset of the CP scratch registers.
2887 * NOTE: use of CP scratch registers is a legacy inferface and
2888 * is not used by default on newer asics (r6xx+). On newer asics,
2889 * memory buffers are used for fences rather than scratch regs.
2890 */
2891static void cik_scratch_init(struct radeon_device *rdev)
2892{
2893 int i;
2894
2895 rdev->scratch.num_reg = 7;
2896 rdev->scratch.reg_base = SCRATCH_REG0;
2897 for (i = 0; i < rdev->scratch.num_reg; i++) {
2898 rdev->scratch.free[i] = true;
2899 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2900 }
2901}
2902
2903/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002904 * cik_ring_test - basic gfx ring test
2905 *
2906 * @rdev: radeon_device pointer
2907 * @ring: radeon_ring structure holding ring information
2908 *
2909 * Allocate a scratch register and write to it using the gfx ring (CIK).
2910 * Provides a basic gfx ring test to verify that the ring is working.
2911 * Used by cik_cp_gfx_resume();
2912 * Returns 0 on success, error on failure.
2913 */
2914int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2915{
2916 uint32_t scratch;
2917 uint32_t tmp = 0;
2918 unsigned i;
2919 int r;
2920
2921 r = radeon_scratch_get(rdev, &scratch);
2922 if (r) {
2923 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2924 return r;
2925 }
2926 WREG32(scratch, 0xCAFEDEAD);
2927 r = radeon_ring_lock(rdev, ring, 3);
2928 if (r) {
2929 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2930 radeon_scratch_free(rdev, scratch);
2931 return r;
2932 }
2933 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2934 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2935 radeon_ring_write(ring, 0xDEADBEEF);
2936 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002937
Alex Deucherfbc832c2012-07-20 14:41:35 -04002938 for (i = 0; i < rdev->usec_timeout; i++) {
2939 tmp = RREG32(scratch);
2940 if (tmp == 0xDEADBEEF)
2941 break;
2942 DRM_UDELAY(1);
2943 }
2944 if (i < rdev->usec_timeout) {
2945 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2946 } else {
2947 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2948 ring->idx, scratch, tmp);
2949 r = -EINVAL;
2950 }
2951 radeon_scratch_free(rdev, scratch);
2952 return r;
2953}
2954
2955/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002956 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002957 *
2958 * @rdev: radeon_device pointer
2959 * @fence: radeon fence object
2960 *
2961 * Emits a fence sequnce number on the gfx ring and flushes
2962 * GPU caches.
2963 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002964void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2965 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002966{
2967 struct radeon_ring *ring = &rdev->ring[fence->ring];
2968 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2969
2970 /* EVENT_WRITE_EOP - flush caches, send int */
2971 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2972 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2973 EOP_TC_ACTION_EN |
2974 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2975 EVENT_INDEX(5)));
2976 radeon_ring_write(ring, addr & 0xfffffffc);
2977 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2978 radeon_ring_write(ring, fence->seq);
2979 radeon_ring_write(ring, 0);
2980 /* HDP flush */
2981 /* We should be using the new WAIT_REG_MEM special op packet here
2982 * but it causes the CP to hang
2983 */
2984 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2985 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2986 WRITE_DATA_DST_SEL(0)));
2987 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2988 radeon_ring_write(ring, 0);
2989 radeon_ring_write(ring, 0);
2990}
2991
Alex Deucherb07fdd32013-04-11 09:36:17 -04002992/**
2993 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2994 *
2995 * @rdev: radeon_device pointer
2996 * @fence: radeon fence object
2997 *
2998 * Emits a fence sequnce number on the compute ring and flushes
2999 * GPU caches.
3000 */
3001void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3002 struct radeon_fence *fence)
3003{
3004 struct radeon_ring *ring = &rdev->ring[fence->ring];
3005 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3006
3007 /* RELEASE_MEM - flush caches, send int */
3008 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3009 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3010 EOP_TC_ACTION_EN |
3011 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3012 EVENT_INDEX(5)));
3013 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3014 radeon_ring_write(ring, addr & 0xfffffffc);
3015 radeon_ring_write(ring, upper_32_bits(addr));
3016 radeon_ring_write(ring, fence->seq);
3017 radeon_ring_write(ring, 0);
3018 /* HDP flush */
3019 /* We should be using the new WAIT_REG_MEM special op packet here
3020 * but it causes the CP to hang
3021 */
3022 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3023 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3024 WRITE_DATA_DST_SEL(0)));
3025 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3026 radeon_ring_write(ring, 0);
3027 radeon_ring_write(ring, 0);
3028}
3029
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003030void cik_semaphore_ring_emit(struct radeon_device *rdev,
3031 struct radeon_ring *ring,
3032 struct radeon_semaphore *semaphore,
3033 bool emit_wait)
3034{
3035 uint64_t addr = semaphore->gpu_addr;
3036 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3037
3038 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3039 radeon_ring_write(ring, addr & 0xffffffff);
3040 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3041}
3042
3043/*
3044 * IB stuff
3045 */
3046/**
3047 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3048 *
3049 * @rdev: radeon_device pointer
3050 * @ib: radeon indirect buffer object
3051 *
3052 * Emits an DE (drawing engine) or CE (constant engine) IB
3053 * on the gfx ring. IBs are usually generated by userspace
3054 * acceleration drivers and submitted to the kernel for
3055 * sheduling on the ring. This function schedules the IB
3056 * on the gfx ring for execution by the GPU.
3057 */
3058void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3059{
3060 struct radeon_ring *ring = &rdev->ring[ib->ring];
3061 u32 header, control = INDIRECT_BUFFER_VALID;
3062
3063 if (ib->is_const_ib) {
3064 /* set switch buffer packet before const IB */
3065 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3066 radeon_ring_write(ring, 0);
3067
3068 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3069 } else {
3070 u32 next_rptr;
3071 if (ring->rptr_save_reg) {
3072 next_rptr = ring->wptr + 3 + 4;
3073 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3074 radeon_ring_write(ring, ((ring->rptr_save_reg -
3075 PACKET3_SET_UCONFIG_REG_START) >> 2));
3076 radeon_ring_write(ring, next_rptr);
3077 } else if (rdev->wb.enabled) {
3078 next_rptr = ring->wptr + 5 + 4;
3079 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3081 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3082 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3083 radeon_ring_write(ring, next_rptr);
3084 }
3085
3086 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3087 }
3088
3089 control |= ib->length_dw |
3090 (ib->vm ? (ib->vm->id << 24) : 0);
3091
3092 radeon_ring_write(ring, header);
3093 radeon_ring_write(ring,
3094#ifdef __BIG_ENDIAN
3095 (2 << 0) |
3096#endif
3097 (ib->gpu_addr & 0xFFFFFFFC));
3098 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3099 radeon_ring_write(ring, control);
3100}
3101
Alex Deucherfbc832c2012-07-20 14:41:35 -04003102/**
3103 * cik_ib_test - basic gfx ring IB test
3104 *
3105 * @rdev: radeon_device pointer
3106 * @ring: radeon_ring structure holding ring information
3107 *
3108 * Allocate an IB and execute it on the gfx ring (CIK).
3109 * Provides a basic gfx ring test to verify that IBs are working.
3110 * Returns 0 on success, error on failure.
3111 */
3112int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3113{
3114 struct radeon_ib ib;
3115 uint32_t scratch;
3116 uint32_t tmp = 0;
3117 unsigned i;
3118 int r;
3119
3120 r = radeon_scratch_get(rdev, &scratch);
3121 if (r) {
3122 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3123 return r;
3124 }
3125 WREG32(scratch, 0xCAFEDEAD);
3126 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3127 if (r) {
3128 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3129 return r;
3130 }
3131 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3132 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3133 ib.ptr[2] = 0xDEADBEEF;
3134 ib.length_dw = 3;
3135 r = radeon_ib_schedule(rdev, &ib, NULL);
3136 if (r) {
3137 radeon_scratch_free(rdev, scratch);
3138 radeon_ib_free(rdev, &ib);
3139 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3140 return r;
3141 }
3142 r = radeon_fence_wait(ib.fence, false);
3143 if (r) {
3144 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3145 return r;
3146 }
3147 for (i = 0; i < rdev->usec_timeout; i++) {
3148 tmp = RREG32(scratch);
3149 if (tmp == 0xDEADBEEF)
3150 break;
3151 DRM_UDELAY(1);
3152 }
3153 if (i < rdev->usec_timeout) {
3154 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3155 } else {
3156 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3157 scratch, tmp);
3158 r = -EINVAL;
3159 }
3160 radeon_scratch_free(rdev, scratch);
3161 radeon_ib_free(rdev, &ib);
3162 return r;
3163}
3164
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003165/*
Alex Deucher841cf442012-12-18 21:47:44 -05003166 * CP.
3167 * On CIK, gfx and compute now have independant command processors.
3168 *
3169 * GFX
3170 * Gfx consists of a single ring and can process both gfx jobs and
3171 * compute jobs. The gfx CP consists of three microengines (ME):
3172 * PFP - Pre-Fetch Parser
3173 * ME - Micro Engine
3174 * CE - Constant Engine
3175 * The PFP and ME make up what is considered the Drawing Engine (DE).
3176 * The CE is an asynchronous engine used for updating buffer desciptors
3177 * used by the DE so that they can be loaded into cache in parallel
3178 * while the DE is processing state update packets.
3179 *
3180 * Compute
3181 * The compute CP consists of two microengines (ME):
3182 * MEC1 - Compute MicroEngine 1
3183 * MEC2 - Compute MicroEngine 2
3184 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3185 * The queues are exposed to userspace and are programmed directly
3186 * by the compute runtime.
3187 */
3188/**
3189 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3190 *
3191 * @rdev: radeon_device pointer
3192 * @enable: enable or disable the MEs
3193 *
3194 * Halts or unhalts the gfx MEs.
3195 */
3196static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3197{
3198 if (enable)
3199 WREG32(CP_ME_CNTL, 0);
3200 else {
3201 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3202 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3203 }
3204 udelay(50);
3205}
3206
3207/**
3208 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3209 *
3210 * @rdev: radeon_device pointer
3211 *
3212 * Loads the gfx PFP, ME, and CE ucode.
3213 * Returns 0 for success, -EINVAL if the ucode is not available.
3214 */
3215static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3216{
3217 const __be32 *fw_data;
3218 int i;
3219
3220 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3221 return -EINVAL;
3222
3223 cik_cp_gfx_enable(rdev, false);
3224
3225 /* PFP */
3226 fw_data = (const __be32 *)rdev->pfp_fw->data;
3227 WREG32(CP_PFP_UCODE_ADDR, 0);
3228 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3229 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3230 WREG32(CP_PFP_UCODE_ADDR, 0);
3231
3232 /* CE */
3233 fw_data = (const __be32 *)rdev->ce_fw->data;
3234 WREG32(CP_CE_UCODE_ADDR, 0);
3235 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3236 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3237 WREG32(CP_CE_UCODE_ADDR, 0);
3238
3239 /* ME */
3240 fw_data = (const __be32 *)rdev->me_fw->data;
3241 WREG32(CP_ME_RAM_WADDR, 0);
3242 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3243 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3244 WREG32(CP_ME_RAM_WADDR, 0);
3245
3246 WREG32(CP_PFP_UCODE_ADDR, 0);
3247 WREG32(CP_CE_UCODE_ADDR, 0);
3248 WREG32(CP_ME_RAM_WADDR, 0);
3249 WREG32(CP_ME_RAM_RADDR, 0);
3250 return 0;
3251}
3252
3253/**
3254 * cik_cp_gfx_start - start the gfx ring
3255 *
3256 * @rdev: radeon_device pointer
3257 *
3258 * Enables the ring and loads the clear state context and other
3259 * packets required to init the ring.
3260 * Returns 0 for success, error for failure.
3261 */
3262static int cik_cp_gfx_start(struct radeon_device *rdev)
3263{
3264 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3265 int r, i;
3266
3267 /* init the CP */
3268 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3269 WREG32(CP_ENDIAN_SWAP, 0);
3270 WREG32(CP_DEVICE_ID, 1);
3271
3272 cik_cp_gfx_enable(rdev, true);
3273
3274 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3275 if (r) {
3276 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277 return r;
3278 }
3279
3280 /* init the CE partitions. CE only used for gfx on CIK */
3281 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3282 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3283 radeon_ring_write(ring, 0xc000);
3284 radeon_ring_write(ring, 0xc000);
3285
3286 /* setup clear context state */
3287 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3288 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3289
3290 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3291 radeon_ring_write(ring, 0x80000000);
3292 radeon_ring_write(ring, 0x80000000);
3293
3294 for (i = 0; i < cik_default_size; i++)
3295 radeon_ring_write(ring, cik_default_state[i]);
3296
3297 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299
3300 /* set clear context state */
3301 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302 radeon_ring_write(ring, 0);
3303
3304 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3305 radeon_ring_write(ring, 0x00000316);
3306 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3307 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308
3309 radeon_ring_unlock_commit(rdev, ring);
3310
3311 return 0;
3312}
3313
3314/**
3315 * cik_cp_gfx_fini - stop the gfx ring
3316 *
3317 * @rdev: radeon_device pointer
3318 *
3319 * Stop the gfx ring and tear down the driver ring
3320 * info.
3321 */
3322static void cik_cp_gfx_fini(struct radeon_device *rdev)
3323{
3324 cik_cp_gfx_enable(rdev, false);
3325 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3326}
3327
3328/**
3329 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3330 *
3331 * @rdev: radeon_device pointer
3332 *
3333 * Program the location and size of the gfx ring buffer
3334 * and test it to make sure it's working.
3335 * Returns 0 for success, error for failure.
3336 */
3337static int cik_cp_gfx_resume(struct radeon_device *rdev)
3338{
3339 struct radeon_ring *ring;
3340 u32 tmp;
3341 u32 rb_bufsz;
3342 u64 rb_addr;
3343 int r;
3344
3345 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3346 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3347
3348 /* Set the write pointer delay */
3349 WREG32(CP_RB_WPTR_DELAY, 0);
3350
3351 /* set the RB to use vmid 0 */
3352 WREG32(CP_RB_VMID, 0);
3353
3354 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3355
3356 /* ring 0 - compute and gfx */
3357 /* Set ring buffer size */
3358 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3359 rb_bufsz = drm_order(ring->ring_size / 8);
3360 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3361#ifdef __BIG_ENDIAN
3362 tmp |= BUF_SWAP_32BIT;
3363#endif
3364 WREG32(CP_RB0_CNTL, tmp);
3365
3366 /* Initialize the ring buffer's read and write pointers */
3367 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3368 ring->wptr = 0;
3369 WREG32(CP_RB0_WPTR, ring->wptr);
3370
3371 /* set the wb address wether it's enabled or not */
3372 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3373 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3374
3375 /* scratch register shadowing is no longer supported */
3376 WREG32(SCRATCH_UMSK, 0);
3377
3378 if (!rdev->wb.enabled)
3379 tmp |= RB_NO_UPDATE;
3380
3381 mdelay(1);
3382 WREG32(CP_RB0_CNTL, tmp);
3383
3384 rb_addr = ring->gpu_addr >> 8;
3385 WREG32(CP_RB0_BASE, rb_addr);
3386 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3387
3388 ring->rptr = RREG32(CP_RB0_RPTR);
3389
3390 /* start the ring */
3391 cik_cp_gfx_start(rdev);
3392 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3393 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3394 if (r) {
3395 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3396 return r;
3397 }
3398 return 0;
3399}
3400
Alex Deucher963e81f2013-06-26 17:37:11 -04003401u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3402 struct radeon_ring *ring)
3403{
3404 u32 rptr;
3405
3406
3407
3408 if (rdev->wb.enabled) {
3409 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3410 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003411 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003412 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3413 rptr = RREG32(CP_HQD_PQ_RPTR);
3414 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003415 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003416 }
3417 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3418
3419 return rptr;
3420}
3421
3422u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3423 struct radeon_ring *ring)
3424{
3425 u32 wptr;
3426
3427 if (rdev->wb.enabled) {
3428 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3429 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003430 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003431 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3432 wptr = RREG32(CP_HQD_PQ_WPTR);
3433 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003434 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003435 }
3436 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3437
3438 return wptr;
3439}
3440
3441void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3442 struct radeon_ring *ring)
3443{
3444 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3445
3446 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3447 WDOORBELL32(ring->doorbell_offset, wptr);
3448}
3449
Alex Deucher841cf442012-12-18 21:47:44 -05003450/**
3451 * cik_cp_compute_enable - enable/disable the compute CP MEs
3452 *
3453 * @rdev: radeon_device pointer
3454 * @enable: enable or disable the MEs
3455 *
3456 * Halts or unhalts the compute MEs.
3457 */
3458static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3459{
3460 if (enable)
3461 WREG32(CP_MEC_CNTL, 0);
3462 else
3463 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3464 udelay(50);
3465}
3466
3467/**
3468 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3469 *
3470 * @rdev: radeon_device pointer
3471 *
3472 * Loads the compute MEC1&2 ucode.
3473 * Returns 0 for success, -EINVAL if the ucode is not available.
3474 */
3475static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3476{
3477 const __be32 *fw_data;
3478 int i;
3479
3480 if (!rdev->mec_fw)
3481 return -EINVAL;
3482
3483 cik_cp_compute_enable(rdev, false);
3484
3485 /* MEC1 */
3486 fw_data = (const __be32 *)rdev->mec_fw->data;
3487 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3488 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3489 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3490 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3491
3492 if (rdev->family == CHIP_KAVERI) {
3493 /* MEC2 */
3494 fw_data = (const __be32 *)rdev->mec_fw->data;
3495 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3496 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3497 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3498 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3499 }
3500
3501 return 0;
3502}
3503
3504/**
3505 * cik_cp_compute_start - start the compute queues
3506 *
3507 * @rdev: radeon_device pointer
3508 *
3509 * Enable the compute queues.
3510 * Returns 0 for success, error for failure.
3511 */
3512static int cik_cp_compute_start(struct radeon_device *rdev)
3513{
Alex Deucher963e81f2013-06-26 17:37:11 -04003514 cik_cp_compute_enable(rdev, true);
3515
Alex Deucher841cf442012-12-18 21:47:44 -05003516 return 0;
3517}
3518
3519/**
3520 * cik_cp_compute_fini - stop the compute queues
3521 *
3522 * @rdev: radeon_device pointer
3523 *
3524 * Stop the compute queues and tear down the driver queue
3525 * info.
3526 */
3527static void cik_cp_compute_fini(struct radeon_device *rdev)
3528{
Alex Deucher963e81f2013-06-26 17:37:11 -04003529 int i, idx, r;
3530
Alex Deucher841cf442012-12-18 21:47:44 -05003531 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04003532
3533 for (i = 0; i < 2; i++) {
3534 if (i == 0)
3535 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3536 else
3537 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3538
3539 if (rdev->ring[idx].mqd_obj) {
3540 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3541 if (unlikely(r != 0))
3542 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3543
3544 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3545 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3546
3547 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3548 rdev->ring[idx].mqd_obj = NULL;
3549 }
3550 }
Alex Deucher841cf442012-12-18 21:47:44 -05003551}
3552
Alex Deucher963e81f2013-06-26 17:37:11 -04003553static void cik_mec_fini(struct radeon_device *rdev)
3554{
3555 int r;
3556
3557 if (rdev->mec.hpd_eop_obj) {
3558 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3559 if (unlikely(r != 0))
3560 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3561 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3562 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3563
3564 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3565 rdev->mec.hpd_eop_obj = NULL;
3566 }
3567}
3568
3569#define MEC_HPD_SIZE 2048
3570
3571static int cik_mec_init(struct radeon_device *rdev)
3572{
3573 int r;
3574 u32 *hpd;
3575
3576 /*
3577 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3578 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3579 */
3580 if (rdev->family == CHIP_KAVERI)
3581 rdev->mec.num_mec = 2;
3582 else
3583 rdev->mec.num_mec = 1;
3584 rdev->mec.num_pipe = 4;
3585 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3586
3587 if (rdev->mec.hpd_eop_obj == NULL) {
3588 r = radeon_bo_create(rdev,
3589 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3590 PAGE_SIZE, true,
3591 RADEON_GEM_DOMAIN_GTT, NULL,
3592 &rdev->mec.hpd_eop_obj);
3593 if (r) {
3594 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3595 return r;
3596 }
3597 }
3598
3599 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3600 if (unlikely(r != 0)) {
3601 cik_mec_fini(rdev);
3602 return r;
3603 }
3604 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3605 &rdev->mec.hpd_eop_gpu_addr);
3606 if (r) {
3607 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3608 cik_mec_fini(rdev);
3609 return r;
3610 }
3611 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3612 if (r) {
3613 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3614 cik_mec_fini(rdev);
3615 return r;
3616 }
3617
3618 /* clear memory. Not sure if this is required or not */
3619 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3620
3621 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3622 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3623
3624 return 0;
3625}
3626
3627struct hqd_registers
3628{
3629 u32 cp_mqd_base_addr;
3630 u32 cp_mqd_base_addr_hi;
3631 u32 cp_hqd_active;
3632 u32 cp_hqd_vmid;
3633 u32 cp_hqd_persistent_state;
3634 u32 cp_hqd_pipe_priority;
3635 u32 cp_hqd_queue_priority;
3636 u32 cp_hqd_quantum;
3637 u32 cp_hqd_pq_base;
3638 u32 cp_hqd_pq_base_hi;
3639 u32 cp_hqd_pq_rptr;
3640 u32 cp_hqd_pq_rptr_report_addr;
3641 u32 cp_hqd_pq_rptr_report_addr_hi;
3642 u32 cp_hqd_pq_wptr_poll_addr;
3643 u32 cp_hqd_pq_wptr_poll_addr_hi;
3644 u32 cp_hqd_pq_doorbell_control;
3645 u32 cp_hqd_pq_wptr;
3646 u32 cp_hqd_pq_control;
3647 u32 cp_hqd_ib_base_addr;
3648 u32 cp_hqd_ib_base_addr_hi;
3649 u32 cp_hqd_ib_rptr;
3650 u32 cp_hqd_ib_control;
3651 u32 cp_hqd_iq_timer;
3652 u32 cp_hqd_iq_rptr;
3653 u32 cp_hqd_dequeue_request;
3654 u32 cp_hqd_dma_offload;
3655 u32 cp_hqd_sema_cmd;
3656 u32 cp_hqd_msg_type;
3657 u32 cp_hqd_atomic0_preop_lo;
3658 u32 cp_hqd_atomic0_preop_hi;
3659 u32 cp_hqd_atomic1_preop_lo;
3660 u32 cp_hqd_atomic1_preop_hi;
3661 u32 cp_hqd_hq_scheduler0;
3662 u32 cp_hqd_hq_scheduler1;
3663 u32 cp_mqd_control;
3664};
3665
3666struct bonaire_mqd
3667{
3668 u32 header;
3669 u32 dispatch_initiator;
3670 u32 dimensions[3];
3671 u32 start_idx[3];
3672 u32 num_threads[3];
3673 u32 pipeline_stat_enable;
3674 u32 perf_counter_enable;
3675 u32 pgm[2];
3676 u32 tba[2];
3677 u32 tma[2];
3678 u32 pgm_rsrc[2];
3679 u32 vmid;
3680 u32 resource_limits;
3681 u32 static_thread_mgmt01[2];
3682 u32 tmp_ring_size;
3683 u32 static_thread_mgmt23[2];
3684 u32 restart[3];
3685 u32 thread_trace_enable;
3686 u32 reserved1;
3687 u32 user_data[16];
3688 u32 vgtcs_invoke_count[2];
3689 struct hqd_registers queue_state;
3690 u32 dequeue_cntr;
3691 u32 interrupt_queue[64];
3692};
3693
Alex Deucher841cf442012-12-18 21:47:44 -05003694/**
3695 * cik_cp_compute_resume - setup the compute queue registers
3696 *
3697 * @rdev: radeon_device pointer
3698 *
3699 * Program the compute queues and test them to make sure they
3700 * are working.
3701 * Returns 0 for success, error for failure.
3702 */
3703static int cik_cp_compute_resume(struct radeon_device *rdev)
3704{
Alex Deucher963e81f2013-06-26 17:37:11 -04003705 int r, i, idx;
3706 u32 tmp;
3707 bool use_doorbell = true;
3708 u64 hqd_gpu_addr;
3709 u64 mqd_gpu_addr;
3710 u64 eop_gpu_addr;
3711 u64 wb_gpu_addr;
3712 u32 *buf;
3713 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05003714
Alex Deucher841cf442012-12-18 21:47:44 -05003715 r = cik_cp_compute_start(rdev);
3716 if (r)
3717 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04003718
3719 /* fix up chicken bits */
3720 tmp = RREG32(CP_CPF_DEBUG);
3721 tmp |= (1 << 23);
3722 WREG32(CP_CPF_DEBUG, tmp);
3723
3724 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04003725 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003726 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3727 int me = (i < 4) ? 1 : 2;
3728 int pipe = (i < 4) ? i : (i - 4);
3729
3730 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3731
3732 cik_srbm_select(rdev, me, pipe, 0, 0);
3733
3734 /* write the EOP addr */
3735 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3736 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3737
3738 /* set the VMID assigned */
3739 WREG32(CP_HPD_EOP_VMID, 0);
3740
3741 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3742 tmp = RREG32(CP_HPD_EOP_CONTROL);
3743 tmp &= ~EOP_SIZE_MASK;
3744 tmp |= drm_order(MEC_HPD_SIZE / 8);
3745 WREG32(CP_HPD_EOP_CONTROL, tmp);
3746 }
3747 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003748 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003749
3750 /* init the queues. Just two for now. */
3751 for (i = 0; i < 2; i++) {
3752 if (i == 0)
3753 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3754 else
3755 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3756
3757 if (rdev->ring[idx].mqd_obj == NULL) {
3758 r = radeon_bo_create(rdev,
3759 sizeof(struct bonaire_mqd),
3760 PAGE_SIZE, true,
3761 RADEON_GEM_DOMAIN_GTT, NULL,
3762 &rdev->ring[idx].mqd_obj);
3763 if (r) {
3764 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3765 return r;
3766 }
3767 }
3768
3769 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3770 if (unlikely(r != 0)) {
3771 cik_cp_compute_fini(rdev);
3772 return r;
3773 }
3774 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3775 &mqd_gpu_addr);
3776 if (r) {
3777 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3778 cik_cp_compute_fini(rdev);
3779 return r;
3780 }
3781 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3782 if (r) {
3783 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3784 cik_cp_compute_fini(rdev);
3785 return r;
3786 }
3787
3788 /* doorbell offset */
3789 rdev->ring[idx].doorbell_offset =
3790 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3791
3792 /* init the mqd struct */
3793 memset(buf, 0, sizeof(struct bonaire_mqd));
3794
3795 mqd = (struct bonaire_mqd *)buf;
3796 mqd->header = 0xC0310800;
3797 mqd->static_thread_mgmt01[0] = 0xffffffff;
3798 mqd->static_thread_mgmt01[1] = 0xffffffff;
3799 mqd->static_thread_mgmt23[0] = 0xffffffff;
3800 mqd->static_thread_mgmt23[1] = 0xffffffff;
3801
Alex Deucherf61d5b462013-08-06 12:40:16 -04003802 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003803 cik_srbm_select(rdev, rdev->ring[idx].me,
3804 rdev->ring[idx].pipe,
3805 rdev->ring[idx].queue, 0);
3806
3807 /* disable wptr polling */
3808 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3809 tmp &= ~WPTR_POLL_EN;
3810 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3811
3812 /* enable doorbell? */
3813 mqd->queue_state.cp_hqd_pq_doorbell_control =
3814 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3815 if (use_doorbell)
3816 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3817 else
3818 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3819 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3820 mqd->queue_state.cp_hqd_pq_doorbell_control);
3821
3822 /* disable the queue if it's active */
3823 mqd->queue_state.cp_hqd_dequeue_request = 0;
3824 mqd->queue_state.cp_hqd_pq_rptr = 0;
3825 mqd->queue_state.cp_hqd_pq_wptr= 0;
3826 if (RREG32(CP_HQD_ACTIVE) & 1) {
3827 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3828 for (i = 0; i < rdev->usec_timeout; i++) {
3829 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3830 break;
3831 udelay(1);
3832 }
3833 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3834 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3835 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3836 }
3837
3838 /* set the pointer to the MQD */
3839 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3840 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3841 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3842 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3843 /* set MQD vmid to 0 */
3844 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3845 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3846 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3847
3848 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3849 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3850 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3851 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3852 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3853 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3854
3855 /* set up the HQD, this is similar to CP_RB0_CNTL */
3856 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3857 mqd->queue_state.cp_hqd_pq_control &=
3858 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3859
3860 mqd->queue_state.cp_hqd_pq_control |=
3861 drm_order(rdev->ring[idx].ring_size / 8);
3862 mqd->queue_state.cp_hqd_pq_control |=
3863 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3864#ifdef __BIG_ENDIAN
3865 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3866#endif
3867 mqd->queue_state.cp_hqd_pq_control &=
3868 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3869 mqd->queue_state.cp_hqd_pq_control |=
3870 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3871 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3872
3873 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3874 if (i == 0)
3875 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3876 else
3877 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3878 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3879 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3880 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3881 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3882 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3883
3884 /* set the wb address wether it's enabled or not */
3885 if (i == 0)
3886 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3887 else
3888 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3889 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3890 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3891 upper_32_bits(wb_gpu_addr) & 0xffff;
3892 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3893 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3894 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3895 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3896
3897 /* enable the doorbell if requested */
3898 if (use_doorbell) {
3899 mqd->queue_state.cp_hqd_pq_doorbell_control =
3900 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3901 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3902 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3903 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3904 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3905 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3906 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3907
3908 } else {
3909 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3910 }
3911 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3912 mqd->queue_state.cp_hqd_pq_doorbell_control);
3913
3914 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3915 rdev->ring[idx].wptr = 0;
3916 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3917 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3918 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3919 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3920
3921 /* set the vmid for the queue */
3922 mqd->queue_state.cp_hqd_vmid = 0;
3923 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3924
3925 /* activate the queue */
3926 mqd->queue_state.cp_hqd_active = 1;
3927 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3928
3929 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003930 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003931
3932 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3933 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3934
3935 rdev->ring[idx].ready = true;
3936 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3937 if (r)
3938 rdev->ring[idx].ready = false;
3939 }
3940
Alex Deucher841cf442012-12-18 21:47:44 -05003941 return 0;
3942}
3943
Alex Deucher841cf442012-12-18 21:47:44 -05003944static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3945{
3946 cik_cp_gfx_enable(rdev, enable);
3947 cik_cp_compute_enable(rdev, enable);
3948}
3949
Alex Deucher841cf442012-12-18 21:47:44 -05003950static int cik_cp_load_microcode(struct radeon_device *rdev)
3951{
3952 int r;
3953
3954 r = cik_cp_gfx_load_microcode(rdev);
3955 if (r)
3956 return r;
3957 r = cik_cp_compute_load_microcode(rdev);
3958 if (r)
3959 return r;
3960
3961 return 0;
3962}
3963
Alex Deucher841cf442012-12-18 21:47:44 -05003964static void cik_cp_fini(struct radeon_device *rdev)
3965{
3966 cik_cp_gfx_fini(rdev);
3967 cik_cp_compute_fini(rdev);
3968}
3969
Alex Deucher841cf442012-12-18 21:47:44 -05003970static int cik_cp_resume(struct radeon_device *rdev)
3971{
3972 int r;
3973
3974 /* Reset all cp blocks */
3975 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3976 RREG32(GRBM_SOFT_RESET);
3977 mdelay(15);
3978 WREG32(GRBM_SOFT_RESET, 0);
3979 RREG32(GRBM_SOFT_RESET);
3980
3981 r = cik_cp_load_microcode(rdev);
3982 if (r)
3983 return r;
3984
3985 r = cik_cp_gfx_resume(rdev);
3986 if (r)
3987 return r;
3988 r = cik_cp_compute_resume(rdev);
3989 if (r)
3990 return r;
3991
3992 return 0;
3993}
3994
Alex Deucher21a93e12013-04-09 12:47:11 -04003995/*
3996 * sDMA - System DMA
3997 * Starting with CIK, the GPU has new asynchronous
3998 * DMA engines. These engines are used for compute
3999 * and gfx. There are two DMA engines (SDMA0, SDMA1)
4000 * and each one supports 1 ring buffer used for gfx
4001 * and 2 queues used for compute.
4002 *
4003 * The programming model is very similar to the CP
4004 * (ring buffer, IBs, etc.), but sDMA has it's own
4005 * packet format that is different from the PM4 format
4006 * used by the CP. sDMA supports copying data, writing
4007 * embedded data, solid fills, and a number of other
4008 * things. It also has support for tiling/detiling of
4009 * buffers.
4010 */
4011/**
4012 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4013 *
4014 * @rdev: radeon_device pointer
4015 * @ib: IB object to schedule
4016 *
4017 * Schedule an IB in the DMA ring (CIK).
4018 */
4019void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4020 struct radeon_ib *ib)
4021{
4022 struct radeon_ring *ring = &rdev->ring[ib->ring];
4023 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4024
4025 if (rdev->wb.enabled) {
4026 u32 next_rptr = ring->wptr + 5;
4027 while ((next_rptr & 7) != 4)
4028 next_rptr++;
4029 next_rptr += 4;
4030 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4031 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4032 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4033 radeon_ring_write(ring, 1); /* number of DWs to follow */
4034 radeon_ring_write(ring, next_rptr);
4035 }
4036
4037 /* IB packet must end on a 8 DW boundary */
4038 while ((ring->wptr & 7) != 4)
4039 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4040 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4041 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4042 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4043 radeon_ring_write(ring, ib->length_dw);
4044
4045}
4046
4047/**
4048 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4049 *
4050 * @rdev: radeon_device pointer
4051 * @fence: radeon fence object
4052 *
4053 * Add a DMA fence packet to the ring to write
4054 * the fence seq number and DMA trap packet to generate
4055 * an interrupt if needed (CIK).
4056 */
4057void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4058 struct radeon_fence *fence)
4059{
4060 struct radeon_ring *ring = &rdev->ring[fence->ring];
4061 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4062 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4063 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4064 u32 ref_and_mask;
4065
4066 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4067 ref_and_mask = SDMA0;
4068 else
4069 ref_and_mask = SDMA1;
4070
4071 /* write the fence */
4072 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4073 radeon_ring_write(ring, addr & 0xffffffff);
4074 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4075 radeon_ring_write(ring, fence->seq);
4076 /* generate an interrupt */
4077 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4078 /* flush HDP */
4079 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4080 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4081 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4082 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4083 radeon_ring_write(ring, ref_and_mask); /* MASK */
4084 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4085}
4086
4087/**
4088 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4089 *
4090 * @rdev: radeon_device pointer
4091 * @ring: radeon_ring structure holding ring information
4092 * @semaphore: radeon semaphore object
4093 * @emit_wait: wait or signal semaphore
4094 *
4095 * Add a DMA semaphore packet to the ring wait on or signal
4096 * other rings (CIK).
4097 */
4098void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4099 struct radeon_ring *ring,
4100 struct radeon_semaphore *semaphore,
4101 bool emit_wait)
4102{
4103 u64 addr = semaphore->gpu_addr;
4104 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4105
4106 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4107 radeon_ring_write(ring, addr & 0xfffffff8);
4108 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4109}
4110
4111/**
4112 * cik_sdma_gfx_stop - stop the gfx async dma engines
4113 *
4114 * @rdev: radeon_device pointer
4115 *
4116 * Stop the gfx async dma ring buffers (CIK).
4117 */
4118static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4119{
4120 u32 rb_cntl, reg_offset;
4121 int i;
4122
4123 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4124
4125 for (i = 0; i < 2; i++) {
4126 if (i == 0)
4127 reg_offset = SDMA0_REGISTER_OFFSET;
4128 else
4129 reg_offset = SDMA1_REGISTER_OFFSET;
4130 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4131 rb_cntl &= ~SDMA_RB_ENABLE;
4132 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4133 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4134 }
4135}
4136
4137/**
4138 * cik_sdma_rlc_stop - stop the compute async dma engines
4139 *
4140 * @rdev: radeon_device pointer
4141 *
4142 * Stop the compute async dma queues (CIK).
4143 */
4144static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4145{
4146 /* XXX todo */
4147}
4148
4149/**
4150 * cik_sdma_enable - stop the async dma engines
4151 *
4152 * @rdev: radeon_device pointer
4153 * @enable: enable/disable the DMA MEs.
4154 *
4155 * Halt or unhalt the async dma engines (CIK).
4156 */
4157static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4158{
4159 u32 me_cntl, reg_offset;
4160 int i;
4161
4162 for (i = 0; i < 2; i++) {
4163 if (i == 0)
4164 reg_offset = SDMA0_REGISTER_OFFSET;
4165 else
4166 reg_offset = SDMA1_REGISTER_OFFSET;
4167 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4168 if (enable)
4169 me_cntl &= ~SDMA_HALT;
4170 else
4171 me_cntl |= SDMA_HALT;
4172 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4173 }
4174}
4175
4176/**
4177 * cik_sdma_gfx_resume - setup and start the async dma engines
4178 *
4179 * @rdev: radeon_device pointer
4180 *
4181 * Set up the gfx DMA ring buffers and enable them (CIK).
4182 * Returns 0 for success, error for failure.
4183 */
4184static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4185{
4186 struct radeon_ring *ring;
4187 u32 rb_cntl, ib_cntl;
4188 u32 rb_bufsz;
4189 u32 reg_offset, wb_offset;
4190 int i, r;
4191
4192 for (i = 0; i < 2; i++) {
4193 if (i == 0) {
4194 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4195 reg_offset = SDMA0_REGISTER_OFFSET;
4196 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4197 } else {
4198 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4199 reg_offset = SDMA1_REGISTER_OFFSET;
4200 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4201 }
4202
4203 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4204 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4205
4206 /* Set ring buffer size in dwords */
4207 rb_bufsz = drm_order(ring->ring_size / 4);
4208 rb_cntl = rb_bufsz << 1;
4209#ifdef __BIG_ENDIAN
4210 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4211#endif
4212 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4213
4214 /* Initialize the ring buffer's read and write pointers */
4215 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4216 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4217
4218 /* set the wb address whether it's enabled or not */
4219 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4220 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4221 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4222 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4223
4224 if (rdev->wb.enabled)
4225 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4226
4227 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4228 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4229
4230 ring->wptr = 0;
4231 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4232
4233 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4234
4235 /* enable DMA RB */
4236 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4237
4238 ib_cntl = SDMA_IB_ENABLE;
4239#ifdef __BIG_ENDIAN
4240 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4241#endif
4242 /* enable DMA IBs */
4243 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4244
4245 ring->ready = true;
4246
4247 r = radeon_ring_test(rdev, ring->idx, ring);
4248 if (r) {
4249 ring->ready = false;
4250 return r;
4251 }
4252 }
4253
4254 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4255
4256 return 0;
4257}
4258
4259/**
4260 * cik_sdma_rlc_resume - setup and start the async dma engines
4261 *
4262 * @rdev: radeon_device pointer
4263 *
4264 * Set up the compute DMA queues and enable them (CIK).
4265 * Returns 0 for success, error for failure.
4266 */
4267static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4268{
4269 /* XXX todo */
4270 return 0;
4271}
4272
4273/**
4274 * cik_sdma_load_microcode - load the sDMA ME ucode
4275 *
4276 * @rdev: radeon_device pointer
4277 *
4278 * Loads the sDMA0/1 ucode.
4279 * Returns 0 for success, -EINVAL if the ucode is not available.
4280 */
4281static int cik_sdma_load_microcode(struct radeon_device *rdev)
4282{
4283 const __be32 *fw_data;
4284 int i;
4285
4286 if (!rdev->sdma_fw)
4287 return -EINVAL;
4288
4289 /* stop the gfx rings and rlc compute queues */
4290 cik_sdma_gfx_stop(rdev);
4291 cik_sdma_rlc_stop(rdev);
4292
4293 /* halt the MEs */
4294 cik_sdma_enable(rdev, false);
4295
4296 /* sdma0 */
4297 fw_data = (const __be32 *)rdev->sdma_fw->data;
4298 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4299 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4300 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4301 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4302
4303 /* sdma1 */
4304 fw_data = (const __be32 *)rdev->sdma_fw->data;
4305 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4306 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4307 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4308 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4309
4310 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4311 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4312 return 0;
4313}
4314
4315/**
4316 * cik_sdma_resume - setup and start the async dma engines
4317 *
4318 * @rdev: radeon_device pointer
4319 *
4320 * Set up the DMA engines and enable them (CIK).
4321 * Returns 0 for success, error for failure.
4322 */
4323static int cik_sdma_resume(struct radeon_device *rdev)
4324{
4325 int r;
4326
4327 /* Reset dma */
4328 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4329 RREG32(SRBM_SOFT_RESET);
4330 udelay(50);
4331 WREG32(SRBM_SOFT_RESET, 0);
4332 RREG32(SRBM_SOFT_RESET);
4333
4334 r = cik_sdma_load_microcode(rdev);
4335 if (r)
4336 return r;
4337
4338 /* unhalt the MEs */
4339 cik_sdma_enable(rdev, true);
4340
4341 /* start the gfx rings and rlc compute queues */
4342 r = cik_sdma_gfx_resume(rdev);
4343 if (r)
4344 return r;
4345 r = cik_sdma_rlc_resume(rdev);
4346 if (r)
4347 return r;
4348
4349 return 0;
4350}
4351
4352/**
4353 * cik_sdma_fini - tear down the async dma engines
4354 *
4355 * @rdev: radeon_device pointer
4356 *
4357 * Stop the async dma engines and free the rings (CIK).
4358 */
4359static void cik_sdma_fini(struct radeon_device *rdev)
4360{
4361 /* stop the gfx rings and rlc compute queues */
4362 cik_sdma_gfx_stop(rdev);
4363 cik_sdma_rlc_stop(rdev);
4364 /* halt the MEs */
4365 cik_sdma_enable(rdev, false);
4366 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4367 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4368 /* XXX - compute dma queue tear down */
4369}
4370
4371/**
4372 * cik_copy_dma - copy pages using the DMA engine
4373 *
4374 * @rdev: radeon_device pointer
4375 * @src_offset: src GPU address
4376 * @dst_offset: dst GPU address
4377 * @num_gpu_pages: number of GPU pages to xfer
4378 * @fence: radeon fence object
4379 *
4380 * Copy GPU paging using the DMA engine (CIK).
4381 * Used by the radeon ttm implementation to move pages if
4382 * registered as the asic copy callback.
4383 */
4384int cik_copy_dma(struct radeon_device *rdev,
4385 uint64_t src_offset, uint64_t dst_offset,
4386 unsigned num_gpu_pages,
4387 struct radeon_fence **fence)
4388{
4389 struct radeon_semaphore *sem = NULL;
4390 int ring_index = rdev->asic->copy.dma_ring_index;
4391 struct radeon_ring *ring = &rdev->ring[ring_index];
4392 u32 size_in_bytes, cur_size_in_bytes;
4393 int i, num_loops;
4394 int r = 0;
4395
4396 r = radeon_semaphore_create(rdev, &sem);
4397 if (r) {
4398 DRM_ERROR("radeon: moving bo (%d).\n", r);
4399 return r;
4400 }
4401
4402 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4403 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4404 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4405 if (r) {
4406 DRM_ERROR("radeon: moving bo (%d).\n", r);
4407 radeon_semaphore_free(rdev, &sem, NULL);
4408 return r;
4409 }
4410
4411 if (radeon_fence_need_sync(*fence, ring->idx)) {
4412 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4413 ring->idx);
4414 radeon_fence_note_sync(*fence, ring->idx);
4415 } else {
4416 radeon_semaphore_free(rdev, &sem, NULL);
4417 }
4418
4419 for (i = 0; i < num_loops; i++) {
4420 cur_size_in_bytes = size_in_bytes;
4421 if (cur_size_in_bytes > 0x1fffff)
4422 cur_size_in_bytes = 0x1fffff;
4423 size_in_bytes -= cur_size_in_bytes;
4424 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4425 radeon_ring_write(ring, cur_size_in_bytes);
4426 radeon_ring_write(ring, 0); /* src/dst endian swap */
4427 radeon_ring_write(ring, src_offset & 0xffffffff);
4428 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4429 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4430 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4431 src_offset += cur_size_in_bytes;
4432 dst_offset += cur_size_in_bytes;
4433 }
4434
4435 r = radeon_fence_emit(rdev, fence, ring->idx);
4436 if (r) {
4437 radeon_ring_unlock_undo(rdev, ring);
4438 return r;
4439 }
4440
4441 radeon_ring_unlock_commit(rdev, ring);
4442 radeon_semaphore_free(rdev, &sem, *fence);
4443
4444 return r;
4445}
4446
4447/**
4448 * cik_sdma_ring_test - simple async dma engine test
4449 *
4450 * @rdev: radeon_device pointer
4451 * @ring: radeon_ring structure holding ring information
4452 *
4453 * Test the DMA engine by writing using it to write an
4454 * value to memory. (CIK).
4455 * Returns 0 for success, error for failure.
4456 */
4457int cik_sdma_ring_test(struct radeon_device *rdev,
4458 struct radeon_ring *ring)
4459{
4460 unsigned i;
4461 int r;
4462 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4463 u32 tmp;
4464
4465 if (!ptr) {
4466 DRM_ERROR("invalid vram scratch pointer\n");
4467 return -EINVAL;
4468 }
4469
4470 tmp = 0xCAFEDEAD;
4471 writel(tmp, ptr);
4472
4473 r = radeon_ring_lock(rdev, ring, 4);
4474 if (r) {
4475 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4476 return r;
4477 }
4478 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4479 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4480 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4481 radeon_ring_write(ring, 1); /* number of DWs to follow */
4482 radeon_ring_write(ring, 0xDEADBEEF);
4483 radeon_ring_unlock_commit(rdev, ring);
4484
4485 for (i = 0; i < rdev->usec_timeout; i++) {
4486 tmp = readl(ptr);
4487 if (tmp == 0xDEADBEEF)
4488 break;
4489 DRM_UDELAY(1);
4490 }
4491
4492 if (i < rdev->usec_timeout) {
4493 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4494 } else {
4495 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4496 ring->idx, tmp);
4497 r = -EINVAL;
4498 }
4499 return r;
4500}
4501
4502/**
4503 * cik_sdma_ib_test - test an IB on the DMA engine
4504 *
4505 * @rdev: radeon_device pointer
4506 * @ring: radeon_ring structure holding ring information
4507 *
4508 * Test a simple IB in the DMA ring (CIK).
4509 * Returns 0 on success, error on failure.
4510 */
4511int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4512{
4513 struct radeon_ib ib;
4514 unsigned i;
4515 int r;
4516 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4517 u32 tmp = 0;
4518
4519 if (!ptr) {
4520 DRM_ERROR("invalid vram scratch pointer\n");
4521 return -EINVAL;
4522 }
4523
4524 tmp = 0xCAFEDEAD;
4525 writel(tmp, ptr);
4526
4527 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4528 if (r) {
4529 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4530 return r;
4531 }
4532
4533 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4534 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4535 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4536 ib.ptr[3] = 1;
4537 ib.ptr[4] = 0xDEADBEEF;
4538 ib.length_dw = 5;
4539
4540 r = radeon_ib_schedule(rdev, &ib, NULL);
4541 if (r) {
4542 radeon_ib_free(rdev, &ib);
4543 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4544 return r;
4545 }
4546 r = radeon_fence_wait(ib.fence, false);
4547 if (r) {
4548 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4549 return r;
4550 }
4551 for (i = 0; i < rdev->usec_timeout; i++) {
4552 tmp = readl(ptr);
4553 if (tmp == 0xDEADBEEF)
4554 break;
4555 DRM_UDELAY(1);
4556 }
4557 if (i < rdev->usec_timeout) {
4558 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4559 } else {
4560 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4561 r = -EINVAL;
4562 }
4563 radeon_ib_free(rdev, &ib);
4564 return r;
4565}
4566
Alex Deuchercc066712013-04-09 12:59:51 -04004567
4568static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4569{
4570 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4571 RREG32(GRBM_STATUS));
4572 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4573 RREG32(GRBM_STATUS2));
4574 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4575 RREG32(GRBM_STATUS_SE0));
4576 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4577 RREG32(GRBM_STATUS_SE1));
4578 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4579 RREG32(GRBM_STATUS_SE2));
4580 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4581 RREG32(GRBM_STATUS_SE3));
4582 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4583 RREG32(SRBM_STATUS));
4584 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4585 RREG32(SRBM_STATUS2));
4586 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4587 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4588 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4589 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04004590 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4591 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4592 RREG32(CP_STALLED_STAT1));
4593 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4594 RREG32(CP_STALLED_STAT2));
4595 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4596 RREG32(CP_STALLED_STAT3));
4597 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4598 RREG32(CP_CPF_BUSY_STAT));
4599 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4600 RREG32(CP_CPF_STALLED_STAT1));
4601 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4602 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4603 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4604 RREG32(CP_CPC_STALLED_STAT1));
4605 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04004606}
4607
Alex Deucher6f2043c2013-04-09 12:43:41 -04004608/**
Alex Deuchercc066712013-04-09 12:59:51 -04004609 * cik_gpu_check_soft_reset - check which blocks are busy
4610 *
4611 * @rdev: radeon_device pointer
4612 *
4613 * Check which blocks are busy and return the relevant reset
4614 * mask to be used by cik_gpu_soft_reset().
4615 * Returns a mask of the blocks to be reset.
4616 */
4617static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4618{
4619 u32 reset_mask = 0;
4620 u32 tmp;
4621
4622 /* GRBM_STATUS */
4623 tmp = RREG32(GRBM_STATUS);
4624 if (tmp & (PA_BUSY | SC_BUSY |
4625 BCI_BUSY | SX_BUSY |
4626 TA_BUSY | VGT_BUSY |
4627 DB_BUSY | CB_BUSY |
4628 GDS_BUSY | SPI_BUSY |
4629 IA_BUSY | IA_BUSY_NO_DMA))
4630 reset_mask |= RADEON_RESET_GFX;
4631
4632 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4633 reset_mask |= RADEON_RESET_CP;
4634
4635 /* GRBM_STATUS2 */
4636 tmp = RREG32(GRBM_STATUS2);
4637 if (tmp & RLC_BUSY)
4638 reset_mask |= RADEON_RESET_RLC;
4639
4640 /* SDMA0_STATUS_REG */
4641 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4642 if (!(tmp & SDMA_IDLE))
4643 reset_mask |= RADEON_RESET_DMA;
4644
4645 /* SDMA1_STATUS_REG */
4646 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4647 if (!(tmp & SDMA_IDLE))
4648 reset_mask |= RADEON_RESET_DMA1;
4649
4650 /* SRBM_STATUS2 */
4651 tmp = RREG32(SRBM_STATUS2);
4652 if (tmp & SDMA_BUSY)
4653 reset_mask |= RADEON_RESET_DMA;
4654
4655 if (tmp & SDMA1_BUSY)
4656 reset_mask |= RADEON_RESET_DMA1;
4657
4658 /* SRBM_STATUS */
4659 tmp = RREG32(SRBM_STATUS);
4660
4661 if (tmp & IH_BUSY)
4662 reset_mask |= RADEON_RESET_IH;
4663
4664 if (tmp & SEM_BUSY)
4665 reset_mask |= RADEON_RESET_SEM;
4666
4667 if (tmp & GRBM_RQ_PENDING)
4668 reset_mask |= RADEON_RESET_GRBM;
4669
4670 if (tmp & VMC_BUSY)
4671 reset_mask |= RADEON_RESET_VMC;
4672
4673 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4674 MCC_BUSY | MCD_BUSY))
4675 reset_mask |= RADEON_RESET_MC;
4676
4677 if (evergreen_is_display_hung(rdev))
4678 reset_mask |= RADEON_RESET_DISPLAY;
4679
4680 /* Skip MC reset as it's mostly likely not hung, just busy */
4681 if (reset_mask & RADEON_RESET_MC) {
4682 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4683 reset_mask &= ~RADEON_RESET_MC;
4684 }
4685
4686 return reset_mask;
4687}
4688
4689/**
4690 * cik_gpu_soft_reset - soft reset GPU
4691 *
4692 * @rdev: radeon_device pointer
4693 * @reset_mask: mask of which blocks to reset
4694 *
4695 * Soft reset the blocks specified in @reset_mask.
4696 */
4697static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4698{
4699 struct evergreen_mc_save save;
4700 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4701 u32 tmp;
4702
4703 if (reset_mask == 0)
4704 return;
4705
4706 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4707
4708 cik_print_gpu_status_regs(rdev);
4709 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4710 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4711 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4712 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4713
4714 /* stop the rlc */
4715 cik_rlc_stop(rdev);
4716
4717 /* Disable GFX parsing/prefetching */
4718 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4719
4720 /* Disable MEC parsing/prefetching */
4721 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4722
4723 if (reset_mask & RADEON_RESET_DMA) {
4724 /* sdma0 */
4725 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4726 tmp |= SDMA_HALT;
4727 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4728 }
4729 if (reset_mask & RADEON_RESET_DMA1) {
4730 /* sdma1 */
4731 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4732 tmp |= SDMA_HALT;
4733 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4734 }
4735
4736 evergreen_mc_stop(rdev, &save);
4737 if (evergreen_mc_wait_for_idle(rdev)) {
4738 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4739 }
4740
4741 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4742 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4743
4744 if (reset_mask & RADEON_RESET_CP) {
4745 grbm_soft_reset |= SOFT_RESET_CP;
4746
4747 srbm_soft_reset |= SOFT_RESET_GRBM;
4748 }
4749
4750 if (reset_mask & RADEON_RESET_DMA)
4751 srbm_soft_reset |= SOFT_RESET_SDMA;
4752
4753 if (reset_mask & RADEON_RESET_DMA1)
4754 srbm_soft_reset |= SOFT_RESET_SDMA1;
4755
4756 if (reset_mask & RADEON_RESET_DISPLAY)
4757 srbm_soft_reset |= SOFT_RESET_DC;
4758
4759 if (reset_mask & RADEON_RESET_RLC)
4760 grbm_soft_reset |= SOFT_RESET_RLC;
4761
4762 if (reset_mask & RADEON_RESET_SEM)
4763 srbm_soft_reset |= SOFT_RESET_SEM;
4764
4765 if (reset_mask & RADEON_RESET_IH)
4766 srbm_soft_reset |= SOFT_RESET_IH;
4767
4768 if (reset_mask & RADEON_RESET_GRBM)
4769 srbm_soft_reset |= SOFT_RESET_GRBM;
4770
4771 if (reset_mask & RADEON_RESET_VMC)
4772 srbm_soft_reset |= SOFT_RESET_VMC;
4773
4774 if (!(rdev->flags & RADEON_IS_IGP)) {
4775 if (reset_mask & RADEON_RESET_MC)
4776 srbm_soft_reset |= SOFT_RESET_MC;
4777 }
4778
4779 if (grbm_soft_reset) {
4780 tmp = RREG32(GRBM_SOFT_RESET);
4781 tmp |= grbm_soft_reset;
4782 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4783 WREG32(GRBM_SOFT_RESET, tmp);
4784 tmp = RREG32(GRBM_SOFT_RESET);
4785
4786 udelay(50);
4787
4788 tmp &= ~grbm_soft_reset;
4789 WREG32(GRBM_SOFT_RESET, tmp);
4790 tmp = RREG32(GRBM_SOFT_RESET);
4791 }
4792
4793 if (srbm_soft_reset) {
4794 tmp = RREG32(SRBM_SOFT_RESET);
4795 tmp |= srbm_soft_reset;
4796 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4797 WREG32(SRBM_SOFT_RESET, tmp);
4798 tmp = RREG32(SRBM_SOFT_RESET);
4799
4800 udelay(50);
4801
4802 tmp &= ~srbm_soft_reset;
4803 WREG32(SRBM_SOFT_RESET, tmp);
4804 tmp = RREG32(SRBM_SOFT_RESET);
4805 }
4806
4807 /* Wait a little for things to settle down */
4808 udelay(50);
4809
4810 evergreen_mc_resume(rdev, &save);
4811 udelay(50);
4812
4813 cik_print_gpu_status_regs(rdev);
4814}
4815
4816/**
4817 * cik_asic_reset - soft reset GPU
4818 *
4819 * @rdev: radeon_device pointer
4820 *
4821 * Look up which blocks are hung and attempt
4822 * to reset them.
4823 * Returns 0 for success.
4824 */
4825int cik_asic_reset(struct radeon_device *rdev)
4826{
4827 u32 reset_mask;
4828
4829 reset_mask = cik_gpu_check_soft_reset(rdev);
4830
4831 if (reset_mask)
4832 r600_set_bios_scratch_engine_hung(rdev, true);
4833
4834 cik_gpu_soft_reset(rdev, reset_mask);
4835
4836 reset_mask = cik_gpu_check_soft_reset(rdev);
4837
4838 if (!reset_mask)
4839 r600_set_bios_scratch_engine_hung(rdev, false);
4840
4841 return 0;
4842}
4843
4844/**
4845 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004846 *
4847 * @rdev: radeon_device pointer
4848 * @ring: radeon_ring structure holding ring information
4849 *
4850 * Check if the 3D engine is locked up (CIK).
4851 * Returns true if the engine is locked, false if not.
4852 */
Alex Deuchercc066712013-04-09 12:59:51 -04004853bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004854{
Alex Deuchercc066712013-04-09 12:59:51 -04004855 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004856
Alex Deuchercc066712013-04-09 12:59:51 -04004857 if (!(reset_mask & (RADEON_RESET_GFX |
4858 RADEON_RESET_COMPUTE |
4859 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004860 radeon_ring_lockup_update(ring);
4861 return false;
4862 }
4863 /* force CP activities */
4864 radeon_ring_force_activity(rdev, ring);
4865 return radeon_ring_test_lockup(rdev, ring);
4866}
4867
4868/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004869 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4870 *
4871 * @rdev: radeon_device pointer
4872 * @ring: radeon_ring structure holding ring information
4873 *
4874 * Check if the async DMA engine is locked up (CIK).
4875 * Returns true if the engine appears to be locked up, false if not.
4876 */
4877bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4878{
Alex Deuchercc066712013-04-09 12:59:51 -04004879 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4880 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004881
4882 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004883 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004884 else
Alex Deuchercc066712013-04-09 12:59:51 -04004885 mask = RADEON_RESET_DMA1;
4886
4887 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004888 radeon_ring_lockup_update(ring);
4889 return false;
4890 }
4891 /* force ring activities */
4892 radeon_ring_force_activity(rdev, ring);
4893 return radeon_ring_test_lockup(rdev, ring);
4894}
4895
Alex Deucher1c491652013-04-09 12:45:26 -04004896/* MC */
4897/**
4898 * cik_mc_program - program the GPU memory controller
4899 *
4900 * @rdev: radeon_device pointer
4901 *
4902 * Set the location of vram, gart, and AGP in the GPU's
4903 * physical address space (CIK).
4904 */
4905static void cik_mc_program(struct radeon_device *rdev)
4906{
4907 struct evergreen_mc_save save;
4908 u32 tmp;
4909 int i, j;
4910
4911 /* Initialize HDP */
4912 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4913 WREG32((0x2c14 + j), 0x00000000);
4914 WREG32((0x2c18 + j), 0x00000000);
4915 WREG32((0x2c1c + j), 0x00000000);
4916 WREG32((0x2c20 + j), 0x00000000);
4917 WREG32((0x2c24 + j), 0x00000000);
4918 }
4919 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4920
4921 evergreen_mc_stop(rdev, &save);
4922 if (radeon_mc_wait_for_idle(rdev)) {
4923 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4924 }
4925 /* Lockout access through VGA aperture*/
4926 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4927 /* Update configuration */
4928 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4929 rdev->mc.vram_start >> 12);
4930 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4931 rdev->mc.vram_end >> 12);
4932 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4933 rdev->vram_scratch.gpu_addr >> 12);
4934 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4935 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4936 WREG32(MC_VM_FB_LOCATION, tmp);
4937 /* XXX double check these! */
4938 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4939 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4940 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4941 WREG32(MC_VM_AGP_BASE, 0);
4942 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4943 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4944 if (radeon_mc_wait_for_idle(rdev)) {
4945 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4946 }
4947 evergreen_mc_resume(rdev, &save);
4948 /* we need to own VRAM, so turn off the VGA renderer here
4949 * to stop it overwriting our objects */
4950 rv515_vga_render_disable(rdev);
4951}
4952
4953/**
4954 * cik_mc_init - initialize the memory controller driver params
4955 *
4956 * @rdev: radeon_device pointer
4957 *
4958 * Look up the amount of vram, vram width, and decide how to place
4959 * vram and gart within the GPU's physical address space (CIK).
4960 * Returns 0 for success.
4961 */
4962static int cik_mc_init(struct radeon_device *rdev)
4963{
4964 u32 tmp;
4965 int chansize, numchan;
4966
4967 /* Get VRAM informations */
4968 rdev->mc.vram_is_ddr = true;
4969 tmp = RREG32(MC_ARB_RAMCFG);
4970 if (tmp & CHANSIZE_MASK) {
4971 chansize = 64;
4972 } else {
4973 chansize = 32;
4974 }
4975 tmp = RREG32(MC_SHARED_CHMAP);
4976 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4977 case 0:
4978 default:
4979 numchan = 1;
4980 break;
4981 case 1:
4982 numchan = 2;
4983 break;
4984 case 2:
4985 numchan = 4;
4986 break;
4987 case 3:
4988 numchan = 8;
4989 break;
4990 case 4:
4991 numchan = 3;
4992 break;
4993 case 5:
4994 numchan = 6;
4995 break;
4996 case 6:
4997 numchan = 10;
4998 break;
4999 case 7:
5000 numchan = 12;
5001 break;
5002 case 8:
5003 numchan = 16;
5004 break;
5005 }
5006 rdev->mc.vram_width = numchan * chansize;
5007 /* Could aper size report 0 ? */
5008 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5009 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5010 /* size in MB on si */
5011 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5012 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5013 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5014 si_vram_gtt_location(rdev, &rdev->mc);
5015 radeon_update_bandwidth_info(rdev);
5016
5017 return 0;
5018}
5019
5020/*
5021 * GART
5022 * VMID 0 is the physical GPU addresses as used by the kernel.
5023 * VMIDs 1-15 are used for userspace clients and are handled
5024 * by the radeon vm/hsa code.
5025 */
5026/**
5027 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5028 *
5029 * @rdev: radeon_device pointer
5030 *
5031 * Flush the TLB for the VMID 0 page table (CIK).
5032 */
5033void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5034{
5035 /* flush hdp cache */
5036 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5037
5038 /* bits 0-15 are the VM contexts0-15 */
5039 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5040}
5041
5042/**
5043 * cik_pcie_gart_enable - gart enable
5044 *
5045 * @rdev: radeon_device pointer
5046 *
5047 * This sets up the TLBs, programs the page tables for VMID0,
5048 * sets up the hw for VMIDs 1-15 which are allocated on
5049 * demand, and sets up the global locations for the LDS, GDS,
5050 * and GPUVM for FSA64 clients (CIK).
5051 * Returns 0 for success, errors for failure.
5052 */
5053static int cik_pcie_gart_enable(struct radeon_device *rdev)
5054{
5055 int r, i;
5056
5057 if (rdev->gart.robj == NULL) {
5058 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5059 return -EINVAL;
5060 }
5061 r = radeon_gart_table_vram_pin(rdev);
5062 if (r)
5063 return r;
5064 radeon_gart_restore(rdev);
5065 /* Setup TLB control */
5066 WREG32(MC_VM_MX_L1_TLB_CNTL,
5067 (0xA << 7) |
5068 ENABLE_L1_TLB |
5069 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5070 ENABLE_ADVANCED_DRIVER_MODEL |
5071 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5072 /* Setup L2 cache */
5073 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5074 ENABLE_L2_FRAGMENT_PROCESSING |
5075 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5076 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5077 EFFECTIVE_L2_QUEUE_SIZE(7) |
5078 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5079 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5080 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5081 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5082 /* setup context0 */
5083 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5084 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5085 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5086 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5087 (u32)(rdev->dummy_page.addr >> 12));
5088 WREG32(VM_CONTEXT0_CNTL2, 0);
5089 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5090 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5091
5092 WREG32(0x15D4, 0);
5093 WREG32(0x15D8, 0);
5094 WREG32(0x15DC, 0);
5095
5096 /* empty context1-15 */
5097 /* FIXME start with 4G, once using 2 level pt switch to full
5098 * vm size space
5099 */
5100 /* set vm size, must be a multiple of 4 */
5101 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5102 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5103 for (i = 1; i < 16; i++) {
5104 if (i < 8)
5105 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5106 rdev->gart.table_addr >> 12);
5107 else
5108 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5109 rdev->gart.table_addr >> 12);
5110 }
5111
5112 /* enable context1-15 */
5113 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5114 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04005115 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04005116 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04005117 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5118 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5119 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5120 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5121 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5122 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5123 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5124 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5125 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5126 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5127 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5128 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04005129
5130 /* TC cache setup ??? */
5131 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5132 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5133 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5134
5135 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5136 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5137 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5138 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5139 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5140
5141 WREG32(TC_CFG_L1_VOLATILE, 0);
5142 WREG32(TC_CFG_L2_VOLATILE, 0);
5143
5144 if (rdev->family == CHIP_KAVERI) {
5145 u32 tmp = RREG32(CHUB_CONTROL);
5146 tmp &= ~BYPASS_VM;
5147 WREG32(CHUB_CONTROL, tmp);
5148 }
5149
5150 /* XXX SH_MEM regs */
5151 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04005152 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005153 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05005154 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04005155 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04005156 WREG32(SH_MEM_CONFIG, 0);
5157 WREG32(SH_MEM_APE1_BASE, 1);
5158 WREG32(SH_MEM_APE1_LIMIT, 0);
5159 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04005160 /* SDMA GFX */
5161 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5162 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5163 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5164 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5165 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04005166 }
Alex Deucherb556b122013-01-29 10:44:22 -05005167 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04005168 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005169
5170 cik_pcie_gart_tlb_flush(rdev);
5171 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5172 (unsigned)(rdev->mc.gtt_size >> 20),
5173 (unsigned long long)rdev->gart.table_addr);
5174 rdev->gart.ready = true;
5175 return 0;
5176}
5177
5178/**
5179 * cik_pcie_gart_disable - gart disable
5180 *
5181 * @rdev: radeon_device pointer
5182 *
5183 * This disables all VM page table (CIK).
5184 */
5185static void cik_pcie_gart_disable(struct radeon_device *rdev)
5186{
5187 /* Disable all tables */
5188 WREG32(VM_CONTEXT0_CNTL, 0);
5189 WREG32(VM_CONTEXT1_CNTL, 0);
5190 /* Setup TLB control */
5191 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5192 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5193 /* Setup L2 cache */
5194 WREG32(VM_L2_CNTL,
5195 ENABLE_L2_FRAGMENT_PROCESSING |
5196 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5197 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5198 EFFECTIVE_L2_QUEUE_SIZE(7) |
5199 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5200 WREG32(VM_L2_CNTL2, 0);
5201 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5202 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5203 radeon_gart_table_vram_unpin(rdev);
5204}
5205
5206/**
5207 * cik_pcie_gart_fini - vm fini callback
5208 *
5209 * @rdev: radeon_device pointer
5210 *
5211 * Tears down the driver GART/VM setup (CIK).
5212 */
5213static void cik_pcie_gart_fini(struct radeon_device *rdev)
5214{
5215 cik_pcie_gart_disable(rdev);
5216 radeon_gart_table_vram_free(rdev);
5217 radeon_gart_fini(rdev);
5218}
5219
5220/* vm parser */
5221/**
5222 * cik_ib_parse - vm ib_parse callback
5223 *
5224 * @rdev: radeon_device pointer
5225 * @ib: indirect buffer pointer
5226 *
5227 * CIK uses hw IB checking so this is a nop (CIK).
5228 */
5229int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5230{
5231 return 0;
5232}
5233
5234/*
5235 * vm
5236 * VMID 0 is the physical GPU addresses as used by the kernel.
5237 * VMIDs 1-15 are used for userspace clients and are handled
5238 * by the radeon vm/hsa code.
5239 */
5240/**
5241 * cik_vm_init - cik vm init callback
5242 *
5243 * @rdev: radeon_device pointer
5244 *
5245 * Inits cik specific vm parameters (number of VMs, base of vram for
5246 * VMIDs 1-15) (CIK).
5247 * Returns 0 for success.
5248 */
5249int cik_vm_init(struct radeon_device *rdev)
5250{
5251 /* number of VMs */
5252 rdev->vm_manager.nvm = 16;
5253 /* base offset of vram pages */
5254 if (rdev->flags & RADEON_IS_IGP) {
5255 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5256 tmp <<= 22;
5257 rdev->vm_manager.vram_base_offset = tmp;
5258 } else
5259 rdev->vm_manager.vram_base_offset = 0;
5260
5261 return 0;
5262}
5263
5264/**
5265 * cik_vm_fini - cik vm fini callback
5266 *
5267 * @rdev: radeon_device pointer
5268 *
5269 * Tear down any asic specific VM setup (CIK).
5270 */
5271void cik_vm_fini(struct radeon_device *rdev)
5272{
5273}
5274
Alex Deucherf96ab482012-08-31 10:37:47 -04005275/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04005276 * cik_vm_decode_fault - print human readable fault info
5277 *
5278 * @rdev: radeon_device pointer
5279 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5280 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5281 *
5282 * Print human readable fault information (CIK).
5283 */
5284static void cik_vm_decode_fault(struct radeon_device *rdev,
5285 u32 status, u32 addr, u32 mc_client)
5286{
5287 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5288 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5289 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5290 char *block = (char *)&mc_client;
5291
5292 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5293 protections, vmid, addr,
5294 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5295 block, mc_id);
5296}
5297
5298/**
Alex Deucherf96ab482012-08-31 10:37:47 -04005299 * cik_vm_flush - cik vm flush using the CP
5300 *
5301 * @rdev: radeon_device pointer
5302 *
5303 * Update the page table base and flush the VM TLB
5304 * using the CP (CIK).
5305 */
5306void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5307{
5308 struct radeon_ring *ring = &rdev->ring[ridx];
5309
5310 if (vm == NULL)
5311 return;
5312
5313 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5314 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5315 WRITE_DATA_DST_SEL(0)));
5316 if (vm->id < 8) {
5317 radeon_ring_write(ring,
5318 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5319 } else {
5320 radeon_ring_write(ring,
5321 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5322 }
5323 radeon_ring_write(ring, 0);
5324 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5325
5326 /* update SH_MEM_* regs */
5327 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5328 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5329 WRITE_DATA_DST_SEL(0)));
5330 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5331 radeon_ring_write(ring, 0);
5332 radeon_ring_write(ring, VMID(vm->id));
5333
5334 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5335 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5336 WRITE_DATA_DST_SEL(0)));
5337 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5338 radeon_ring_write(ring, 0);
5339
5340 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5341 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5342 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5343 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5344
5345 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5346 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5347 WRITE_DATA_DST_SEL(0)));
5348 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5349 radeon_ring_write(ring, 0);
5350 radeon_ring_write(ring, VMID(0));
5351
5352 /* HDP flush */
5353 /* We should be using the WAIT_REG_MEM packet here like in
5354 * cik_fence_ring_emit(), but it causes the CP to hang in this
5355 * context...
5356 */
5357 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5358 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5359 WRITE_DATA_DST_SEL(0)));
5360 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5361 radeon_ring_write(ring, 0);
5362 radeon_ring_write(ring, 0);
5363
5364 /* bits 0-15 are the VM contexts0-15 */
5365 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5366 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5367 WRITE_DATA_DST_SEL(0)));
5368 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5369 radeon_ring_write(ring, 0);
5370 radeon_ring_write(ring, 1 << vm->id);
5371
Alex Deucherb07fdd32013-04-11 09:36:17 -04005372 /* compute doesn't have PFP */
5373 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5374 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5375 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5376 radeon_ring_write(ring, 0x0);
5377 }
Alex Deucherf96ab482012-08-31 10:37:47 -04005378}
5379
Alex Deucher605de6b2012-10-22 13:04:03 -04005380/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04005381 * cik_vm_set_page - update the page tables using sDMA
5382 *
5383 * @rdev: radeon_device pointer
5384 * @ib: indirect buffer to fill with commands
5385 * @pe: addr of the page entry
5386 * @addr: dst addr to write into pe
5387 * @count: number of page entries to update
5388 * @incr: increase next addr by incr bytes
5389 * @flags: access flags
5390 *
5391 * Update the page tables using CP or sDMA (CIK).
5392 */
5393void cik_vm_set_page(struct radeon_device *rdev,
5394 struct radeon_ib *ib,
5395 uint64_t pe,
5396 uint64_t addr, unsigned count,
5397 uint32_t incr, uint32_t flags)
5398{
5399 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5400 uint64_t value;
5401 unsigned ndw;
5402
5403 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5404 /* CP */
5405 while (count) {
5406 ndw = 2 + count * 2;
5407 if (ndw > 0x3FFE)
5408 ndw = 0x3FFE;
5409
5410 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5411 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5412 WRITE_DATA_DST_SEL(1));
5413 ib->ptr[ib->length_dw++] = pe;
5414 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5415 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5416 if (flags & RADEON_VM_PAGE_SYSTEM) {
5417 value = radeon_vm_map_gart(rdev, addr);
5418 value &= 0xFFFFFFFFFFFFF000ULL;
5419 } else if (flags & RADEON_VM_PAGE_VALID) {
5420 value = addr;
5421 } else {
5422 value = 0;
5423 }
5424 addr += incr;
5425 value |= r600_flags;
5426 ib->ptr[ib->length_dw++] = value;
5427 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5428 }
5429 }
5430 } else {
5431 /* DMA */
5432 if (flags & RADEON_VM_PAGE_SYSTEM) {
5433 while (count) {
5434 ndw = count * 2;
5435 if (ndw > 0xFFFFE)
5436 ndw = 0xFFFFE;
5437
5438 /* for non-physically contiguous pages (system) */
5439 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5440 ib->ptr[ib->length_dw++] = pe;
5441 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5442 ib->ptr[ib->length_dw++] = ndw;
5443 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5444 if (flags & RADEON_VM_PAGE_SYSTEM) {
5445 value = radeon_vm_map_gart(rdev, addr);
5446 value &= 0xFFFFFFFFFFFFF000ULL;
5447 } else if (flags & RADEON_VM_PAGE_VALID) {
5448 value = addr;
5449 } else {
5450 value = 0;
5451 }
5452 addr += incr;
5453 value |= r600_flags;
5454 ib->ptr[ib->length_dw++] = value;
5455 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5456 }
5457 }
5458 } else {
5459 while (count) {
5460 ndw = count;
5461 if (ndw > 0x7FFFF)
5462 ndw = 0x7FFFF;
5463
5464 if (flags & RADEON_VM_PAGE_VALID)
5465 value = addr;
5466 else
5467 value = 0;
5468 /* for physically contiguous pages (vram) */
5469 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5470 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5471 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5472 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5473 ib->ptr[ib->length_dw++] = 0;
5474 ib->ptr[ib->length_dw++] = value; /* value */
5475 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5476 ib->ptr[ib->length_dw++] = incr; /* increment size */
5477 ib->ptr[ib->length_dw++] = 0;
5478 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5479 pe += ndw * 8;
5480 addr += ndw * incr;
5481 count -= ndw;
5482 }
5483 }
5484 while (ib->length_dw & 0x7)
5485 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5486 }
5487}
5488
5489/**
Alex Deucher605de6b2012-10-22 13:04:03 -04005490 * cik_dma_vm_flush - cik vm flush using sDMA
5491 *
5492 * @rdev: radeon_device pointer
5493 *
5494 * Update the page table base and flush the VM TLB
5495 * using sDMA (CIK).
5496 */
5497void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5498{
5499 struct radeon_ring *ring = &rdev->ring[ridx];
5500 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5501 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5502 u32 ref_and_mask;
5503
5504 if (vm == NULL)
5505 return;
5506
5507 if (ridx == R600_RING_TYPE_DMA_INDEX)
5508 ref_and_mask = SDMA0;
5509 else
5510 ref_and_mask = SDMA1;
5511
5512 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5513 if (vm->id < 8) {
5514 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5515 } else {
5516 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5517 }
5518 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5519
5520 /* update SH_MEM_* regs */
5521 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5522 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5523 radeon_ring_write(ring, VMID(vm->id));
5524
5525 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5526 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5527 radeon_ring_write(ring, 0);
5528
5529 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5530 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5531 radeon_ring_write(ring, 0);
5532
5533 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5534 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5535 radeon_ring_write(ring, 1);
5536
5537 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5538 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5539 radeon_ring_write(ring, 0);
5540
5541 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5542 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5543 radeon_ring_write(ring, VMID(0));
5544
5545 /* flush HDP */
5546 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5547 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5548 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5549 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5550 radeon_ring_write(ring, ref_and_mask); /* MASK */
5551 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5552
5553 /* flush TLB */
5554 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5555 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5556 radeon_ring_write(ring, 1 << vm->id);
5557}
5558
Alex Deucherf6796ca2012-11-09 10:44:08 -05005559/*
5560 * RLC
5561 * The RLC is a multi-purpose microengine that handles a
5562 * variety of functions, the most important of which is
5563 * the interrupt controller.
5564 */
Alex Deucher866d83d2013-04-15 17:13:29 -04005565static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5566 bool enable)
Alex Deucherf6796ca2012-11-09 10:44:08 -05005567{
Alex Deucher866d83d2013-04-15 17:13:29 -04005568 u32 tmp = RREG32(CP_INT_CNTL_RING0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005569
Alex Deucher866d83d2013-04-15 17:13:29 -04005570 if (enable)
5571 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5572 else
5573 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005574 WREG32(CP_INT_CNTL_RING0, tmp);
Alex Deucher866d83d2013-04-15 17:13:29 -04005575}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005576
Alex Deucher866d83d2013-04-15 17:13:29 -04005577static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5578{
5579 u32 tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005580
Alex Deucher866d83d2013-04-15 17:13:29 -04005581 tmp = RREG32(RLC_LB_CNTL);
5582 if (enable)
5583 tmp |= LOAD_BALANCE_ENABLE;
5584 else
5585 tmp &= ~LOAD_BALANCE_ENABLE;
5586 WREG32(RLC_LB_CNTL, tmp);
5587}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005588
Alex Deucher866d83d2013-04-15 17:13:29 -04005589static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5590{
5591 u32 i, j, k;
5592 u32 mask;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005593
5594 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5595 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5596 cik_select_se_sh(rdev, i, j);
5597 for (k = 0; k < rdev->usec_timeout; k++) {
5598 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5599 break;
5600 udelay(1);
5601 }
5602 }
5603 }
5604 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5605
5606 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5607 for (k = 0; k < rdev->usec_timeout; k++) {
5608 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5609 break;
5610 udelay(1);
5611 }
5612}
5613
Alex Deucher22c775c2013-07-23 09:41:05 -04005614static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5615{
5616 u32 tmp;
5617
5618 tmp = RREG32(RLC_CNTL);
5619 if (tmp != rlc)
5620 WREG32(RLC_CNTL, rlc);
5621}
5622
5623static u32 cik_halt_rlc(struct radeon_device *rdev)
5624{
5625 u32 data, orig;
5626
5627 orig = data = RREG32(RLC_CNTL);
5628
5629 if (data & RLC_ENABLE) {
5630 u32 i;
5631
5632 data &= ~RLC_ENABLE;
5633 WREG32(RLC_CNTL, data);
5634
5635 for (i = 0; i < rdev->usec_timeout; i++) {
5636 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5637 break;
5638 udelay(1);
5639 }
5640
5641 cik_wait_for_rlc_serdes(rdev);
5642 }
5643
5644 return orig;
5645}
5646
Alex Deuchera412fce2013-04-22 20:23:31 -04005647void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5648{
5649 u32 tmp, i, mask;
5650
5651 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5652 WREG32(RLC_GPR_REG2, tmp);
5653
5654 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5655 for (i = 0; i < rdev->usec_timeout; i++) {
5656 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5657 break;
5658 udelay(1);
5659 }
5660
5661 for (i = 0; i < rdev->usec_timeout; i++) {
5662 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5663 break;
5664 udelay(1);
5665 }
5666}
5667
5668void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5669{
5670 u32 tmp;
5671
5672 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5673 WREG32(RLC_GPR_REG2, tmp);
5674}
5675
Alex Deucherf6796ca2012-11-09 10:44:08 -05005676/**
Alex Deucher866d83d2013-04-15 17:13:29 -04005677 * cik_rlc_stop - stop the RLC ME
5678 *
5679 * @rdev: radeon_device pointer
5680 *
5681 * Halt the RLC ME (MicroEngine) (CIK).
5682 */
5683static void cik_rlc_stop(struct radeon_device *rdev)
5684{
Alex Deucher22c775c2013-07-23 09:41:05 -04005685 WREG32(RLC_CNTL, 0);
Alex Deucher866d83d2013-04-15 17:13:29 -04005686
5687 cik_enable_gui_idle_interrupt(rdev, false);
5688
Alex Deucher866d83d2013-04-15 17:13:29 -04005689 cik_wait_for_rlc_serdes(rdev);
5690}
5691
5692/**
Alex Deucherf6796ca2012-11-09 10:44:08 -05005693 * cik_rlc_start - start the RLC ME
5694 *
5695 * @rdev: radeon_device pointer
5696 *
5697 * Unhalt the RLC ME (MicroEngine) (CIK).
5698 */
5699static void cik_rlc_start(struct radeon_device *rdev)
5700{
Alex Deucherf6796ca2012-11-09 10:44:08 -05005701 WREG32(RLC_CNTL, RLC_ENABLE);
5702
Alex Deucher866d83d2013-04-15 17:13:29 -04005703 cik_enable_gui_idle_interrupt(rdev, true);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005704
5705 udelay(50);
5706}
5707
5708/**
5709 * cik_rlc_resume - setup the RLC hw
5710 *
5711 * @rdev: radeon_device pointer
5712 *
5713 * Initialize the RLC registers, load the ucode,
5714 * and start the RLC (CIK).
5715 * Returns 0 for success, -EINVAL if the ucode is not available.
5716 */
5717static int cik_rlc_resume(struct radeon_device *rdev)
5718{
Alex Deucher22c775c2013-07-23 09:41:05 -04005719 u32 i, size, tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005720 const __be32 *fw_data;
5721
5722 if (!rdev->rlc_fw)
5723 return -EINVAL;
5724
5725 switch (rdev->family) {
5726 case CHIP_BONAIRE:
5727 default:
5728 size = BONAIRE_RLC_UCODE_SIZE;
5729 break;
5730 case CHIP_KAVERI:
5731 size = KV_RLC_UCODE_SIZE;
5732 break;
5733 case CHIP_KABINI:
5734 size = KB_RLC_UCODE_SIZE;
5735 break;
5736 }
5737
5738 cik_rlc_stop(rdev);
5739
Alex Deucher22c775c2013-07-23 09:41:05 -04005740 /* disable CG */
5741 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5742 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5743
Alex Deucher866d83d2013-04-15 17:13:29 -04005744 si_rlc_reset(rdev);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005745
Alex Deucher22c775c2013-07-23 09:41:05 -04005746 cik_init_pg(rdev);
5747
5748 cik_init_cg(rdev);
5749
Alex Deucherf6796ca2012-11-09 10:44:08 -05005750 WREG32(RLC_LB_CNTR_INIT, 0);
5751 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5752
5753 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5754 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5755 WREG32(RLC_LB_PARAMS, 0x00600408);
5756 WREG32(RLC_LB_CNTL, 0x80000004);
5757
5758 WREG32(RLC_MC_CNTL, 0);
5759 WREG32(RLC_UCODE_CNTL, 0);
5760
5761 fw_data = (const __be32 *)rdev->rlc_fw->data;
5762 WREG32(RLC_GPM_UCODE_ADDR, 0);
5763 for (i = 0; i < size; i++)
5764 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5765 WREG32(RLC_GPM_UCODE_ADDR, 0);
5766
Alex Deucher866d83d2013-04-15 17:13:29 -04005767 /* XXX - find out what chips support lbpw */
5768 cik_enable_lbpw(rdev, false);
5769
Alex Deucher22c775c2013-07-23 09:41:05 -04005770 if (rdev->family == CHIP_BONAIRE)
5771 WREG32(RLC_DRIVER_DMA_STATUS, 0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005772
5773 cik_rlc_start(rdev);
5774
5775 return 0;
5776}
Alex Deuchera59781b2012-11-09 10:45:57 -05005777
Alex Deucher22c775c2013-07-23 09:41:05 -04005778static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5779{
5780 u32 data, orig, tmp, tmp2;
5781
5782 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5783
5784 cik_enable_gui_idle_interrupt(rdev, enable);
5785
5786 if (enable) {
5787 tmp = cik_halt_rlc(rdev);
5788
5789 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5790 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5791 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5792 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5793 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5794
5795 cik_update_rlc(rdev, tmp);
5796
5797 data |= CGCG_EN | CGLS_EN;
5798 } else {
5799 RREG32(CB_CGTT_SCLK_CTRL);
5800 RREG32(CB_CGTT_SCLK_CTRL);
5801 RREG32(CB_CGTT_SCLK_CTRL);
5802 RREG32(CB_CGTT_SCLK_CTRL);
5803
5804 data &= ~(CGCG_EN | CGLS_EN);
5805 }
5806
5807 if (orig != data)
5808 WREG32(RLC_CGCG_CGLS_CTRL, data);
5809
5810}
5811
5812static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5813{
5814 u32 data, orig, tmp = 0;
5815
5816 if (enable) {
5817 orig = data = RREG32(CP_MEM_SLP_CNTL);
5818 data |= CP_MEM_LS_EN;
5819 if (orig != data)
5820 WREG32(CP_MEM_SLP_CNTL, data);
5821
5822 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5823 data &= 0xfffffffd;
5824 if (orig != data)
5825 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5826
5827 tmp = cik_halt_rlc(rdev);
5828
5829 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5830 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5831 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5832 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5833 WREG32(RLC_SERDES_WR_CTRL, data);
5834
5835 cik_update_rlc(rdev, tmp);
5836
5837 orig = data = RREG32(CGTS_SM_CTRL_REG);
5838 data &= ~SM_MODE_MASK;
5839 data |= SM_MODE(0x2);
5840 data |= SM_MODE_ENABLE;
5841 data &= ~CGTS_OVERRIDE;
5842 data &= ~CGTS_LS_OVERRIDE;
5843 data &= ~ON_MONITOR_ADD_MASK;
5844 data |= ON_MONITOR_ADD_EN;
5845 data |= ON_MONITOR_ADD(0x96);
5846 if (orig != data)
5847 WREG32(CGTS_SM_CTRL_REG, data);
5848 } else {
5849 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5850 data |= 0x00000002;
5851 if (orig != data)
5852 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5853
5854 data = RREG32(RLC_MEM_SLP_CNTL);
5855 if (data & RLC_MEM_LS_EN) {
5856 data &= ~RLC_MEM_LS_EN;
5857 WREG32(RLC_MEM_SLP_CNTL, data);
5858 }
5859
5860 data = RREG32(CP_MEM_SLP_CNTL);
5861 if (data & CP_MEM_LS_EN) {
5862 data &= ~CP_MEM_LS_EN;
5863 WREG32(CP_MEM_SLP_CNTL, data);
5864 }
5865
5866 orig = data = RREG32(CGTS_SM_CTRL_REG);
5867 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5868 if (orig != data)
5869 WREG32(CGTS_SM_CTRL_REG, data);
5870
5871 tmp = cik_halt_rlc(rdev);
5872
5873 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5874 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5875 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5876 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5877 WREG32(RLC_SERDES_WR_CTRL, data);
5878
5879 cik_update_rlc(rdev, tmp);
5880 }
5881}
5882
5883static const u32 mc_cg_registers[] =
5884{
5885 MC_HUB_MISC_HUB_CG,
5886 MC_HUB_MISC_SIP_CG,
5887 MC_HUB_MISC_VM_CG,
5888 MC_XPB_CLK_GAT,
5889 ATC_MISC_CG,
5890 MC_CITF_MISC_WR_CG,
5891 MC_CITF_MISC_RD_CG,
5892 MC_CITF_MISC_VM_CG,
5893 VM_L2_CG,
5894};
5895
5896static void cik_enable_mc_ls(struct radeon_device *rdev,
5897 bool enable)
5898{
5899 int i;
5900 u32 orig, data;
5901
5902 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5903 orig = data = RREG32(mc_cg_registers[i]);
5904 if (enable)
5905 data |= MC_LS_ENABLE;
5906 else
5907 data &= ~MC_LS_ENABLE;
5908 if (data != orig)
5909 WREG32(mc_cg_registers[i], data);
5910 }
5911}
5912
5913static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5914 bool enable)
5915{
5916 int i;
5917 u32 orig, data;
5918
5919 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5920 orig = data = RREG32(mc_cg_registers[i]);
5921 if (enable)
5922 data |= MC_CG_ENABLE;
5923 else
5924 data &= ~MC_CG_ENABLE;
5925 if (data != orig)
5926 WREG32(mc_cg_registers[i], data);
5927 }
5928}
5929
5930static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5931 bool enable)
5932{
5933 u32 orig, data;
5934
5935 if (enable) {
5936 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5937 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5938 } else {
5939 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5940 data |= 0xff000000;
5941 if (data != orig)
5942 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5943
5944 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5945 data |= 0xff000000;
5946 if (data != orig)
5947 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5948 }
5949}
5950
5951static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5952 bool enable)
5953{
5954 u32 orig, data;
5955
5956 if (enable) {
5957 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5958 data |= 0x100;
5959 if (orig != data)
5960 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5961
5962 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5963 data |= 0x100;
5964 if (orig != data)
5965 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5966 } else {
5967 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5968 data &= ~0x100;
5969 if (orig != data)
5970 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5971
5972 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5973 data &= ~0x100;
5974 if (orig != data)
5975 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5976 }
5977}
5978
5979static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5980 bool enable)
5981{
5982 u32 orig, data;
5983
5984 if (enable) {
5985 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5986 data = 0xfff;
5987 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5988
5989 orig = data = RREG32(UVD_CGC_CTRL);
5990 data |= DCM;
5991 if (orig != data)
5992 WREG32(UVD_CGC_CTRL, data);
5993 } else {
5994 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5995 data &= ~0xfff;
5996 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5997
5998 orig = data = RREG32(UVD_CGC_CTRL);
5999 data &= ~DCM;
6000 if (orig != data)
6001 WREG32(UVD_CGC_CTRL, data);
6002 }
6003}
6004
6005static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6006 bool enable)
6007{
6008 u32 orig, data;
6009
6010 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6011
6012 if (enable)
6013 data &= ~CLOCK_GATING_DIS;
6014 else
6015 data |= CLOCK_GATING_DIS;
6016
6017 if (orig != data)
6018 WREG32(HDP_HOST_PATH_CNTL, data);
6019}
6020
6021static void cik_enable_hdp_ls(struct radeon_device *rdev,
6022 bool enable)
6023{
6024 u32 orig, data;
6025
6026 orig = data = RREG32(HDP_MEM_POWER_LS);
6027
6028 if (enable)
6029 data |= HDP_LS_ENABLE;
6030 else
6031 data &= ~HDP_LS_ENABLE;
6032
6033 if (orig != data)
6034 WREG32(HDP_MEM_POWER_LS, data);
6035}
6036
6037void cik_update_cg(struct radeon_device *rdev,
6038 u32 block, bool enable)
6039{
6040 if (block & RADEON_CG_BLOCK_GFX) {
6041 /* order matters! */
6042 if (enable) {
6043 cik_enable_mgcg(rdev, true);
6044 cik_enable_cgcg(rdev, true);
6045 } else {
6046 cik_enable_cgcg(rdev, false);
6047 cik_enable_mgcg(rdev, false);
6048 }
6049 }
6050
6051 if (block & RADEON_CG_BLOCK_MC) {
6052 if (!(rdev->flags & RADEON_IS_IGP)) {
6053 cik_enable_mc_mgcg(rdev, enable);
6054 cik_enable_mc_ls(rdev, enable);
6055 }
6056 }
6057
6058 if (block & RADEON_CG_BLOCK_SDMA) {
6059 cik_enable_sdma_mgcg(rdev, enable);
6060 cik_enable_sdma_mgls(rdev, enable);
6061 }
6062
6063 if (block & RADEON_CG_BLOCK_UVD) {
6064 if (rdev->has_uvd)
6065 cik_enable_uvd_mgcg(rdev, enable);
6066 }
6067
6068 if (block & RADEON_CG_BLOCK_HDP) {
6069 cik_enable_hdp_mgcg(rdev, enable);
6070 cik_enable_hdp_ls(rdev, enable);
6071 }
6072}
6073
6074static void cik_init_cg(struct radeon_device *rdev)
6075{
6076
6077 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6078
6079 if (rdev->has_uvd)
6080 si_init_uvd_internal_cg(rdev);
6081
6082 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6083 RADEON_CG_BLOCK_SDMA |
6084 RADEON_CG_BLOCK_UVD |
6085 RADEON_CG_BLOCK_HDP), true);
6086}
6087
6088static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6089 bool enable)
6090{
6091 u32 data, orig;
6092
6093 orig = data = RREG32(RLC_PG_CNTL);
6094 if (enable)
6095 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6096 else
6097 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6098 if (orig != data)
6099 WREG32(RLC_PG_CNTL, data);
6100}
6101
6102static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6103 bool enable)
6104{
6105 u32 data, orig;
6106
6107 orig = data = RREG32(RLC_PG_CNTL);
6108 if (enable)
6109 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6110 else
6111 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6112 if (orig != data)
6113 WREG32(RLC_PG_CNTL, data);
6114}
6115
6116static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6117{
6118 u32 data, orig;
6119
6120 orig = data = RREG32(RLC_PG_CNTL);
6121 if (enable)
6122 data &= ~DISABLE_CP_PG;
6123 else
6124 data |= DISABLE_CP_PG;
6125 if (orig != data)
6126 WREG32(RLC_PG_CNTL, data);
6127}
6128
6129static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6130{
6131 u32 data, orig;
6132
6133 orig = data = RREG32(RLC_PG_CNTL);
6134 if (enable)
6135 data &= ~DISABLE_GDS_PG;
6136 else
6137 data |= DISABLE_GDS_PG;
6138 if (orig != data)
6139 WREG32(RLC_PG_CNTL, data);
6140}
6141
6142#define CP_ME_TABLE_SIZE 96
6143#define CP_ME_TABLE_OFFSET 2048
6144#define CP_MEC_TABLE_OFFSET 4096
6145
6146void cik_init_cp_pg_table(struct radeon_device *rdev)
6147{
6148 const __be32 *fw_data;
6149 volatile u32 *dst_ptr;
6150 int me, i, max_me = 4;
6151 u32 bo_offset = 0;
6152 u32 table_offset;
6153
6154 if (rdev->family == CHIP_KAVERI)
6155 max_me = 5;
6156
6157 if (rdev->rlc.cp_table_ptr == NULL)
6158 return;
6159
6160 /* write the cp table buffer */
6161 dst_ptr = rdev->rlc.cp_table_ptr;
6162 for (me = 0; me < max_me; me++) {
6163 if (me == 0) {
6164 fw_data = (const __be32 *)rdev->ce_fw->data;
6165 table_offset = CP_ME_TABLE_OFFSET;
6166 } else if (me == 1) {
6167 fw_data = (const __be32 *)rdev->pfp_fw->data;
6168 table_offset = CP_ME_TABLE_OFFSET;
6169 } else if (me == 2) {
6170 fw_data = (const __be32 *)rdev->me_fw->data;
6171 table_offset = CP_ME_TABLE_OFFSET;
6172 } else {
6173 fw_data = (const __be32 *)rdev->mec_fw->data;
6174 table_offset = CP_MEC_TABLE_OFFSET;
6175 }
6176
6177 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6178 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6179 }
6180 bo_offset += CP_ME_TABLE_SIZE;
6181 }
6182}
6183
6184static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6185 bool enable)
6186{
6187 u32 data, orig;
6188
6189 if (enable) {
6190 orig = data = RREG32(RLC_PG_CNTL);
6191 data |= GFX_PG_ENABLE;
6192 if (orig != data)
6193 WREG32(RLC_PG_CNTL, data);
6194
6195 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6196 data |= AUTO_PG_EN;
6197 if (orig != data)
6198 WREG32(RLC_AUTO_PG_CTRL, data);
6199 } else {
6200 orig = data = RREG32(RLC_PG_CNTL);
6201 data &= ~GFX_PG_ENABLE;
6202 if (orig != data)
6203 WREG32(RLC_PG_CNTL, data);
6204
6205 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6206 data &= ~AUTO_PG_EN;
6207 if (orig != data)
6208 WREG32(RLC_AUTO_PG_CTRL, data);
6209
6210 data = RREG32(DB_RENDER_CONTROL);
6211 }
6212}
6213
6214static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6215{
6216 u32 mask = 0, tmp, tmp1;
6217 int i;
6218
6219 cik_select_se_sh(rdev, se, sh);
6220 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6221 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6222 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6223
6224 tmp &= 0xffff0000;
6225
6226 tmp |= tmp1;
6227 tmp >>= 16;
6228
6229 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6230 mask <<= 1;
6231 mask |= 1;
6232 }
6233
6234 return (~tmp) & mask;
6235}
6236
6237static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6238{
6239 u32 i, j, k, active_cu_number = 0;
6240 u32 mask, counter, cu_bitmap;
6241 u32 tmp = 0;
6242
6243 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6244 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6245 mask = 1;
6246 cu_bitmap = 0;
6247 counter = 0;
6248 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6249 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6250 if (counter < 2)
6251 cu_bitmap |= mask;
6252 counter ++;
6253 }
6254 mask <<= 1;
6255 }
6256
6257 active_cu_number += counter;
6258 tmp |= (cu_bitmap << (i * 16 + j * 8));
6259 }
6260 }
6261
6262 WREG32(RLC_PG_AO_CU_MASK, tmp);
6263
6264 tmp = RREG32(RLC_MAX_PG_CU);
6265 tmp &= ~MAX_PU_CU_MASK;
6266 tmp |= MAX_PU_CU(active_cu_number);
6267 WREG32(RLC_MAX_PG_CU, tmp);
6268}
6269
6270static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6271 bool enable)
6272{
6273 u32 data, orig;
6274
6275 orig = data = RREG32(RLC_PG_CNTL);
6276 if (enable)
6277 data |= STATIC_PER_CU_PG_ENABLE;
6278 else
6279 data &= ~STATIC_PER_CU_PG_ENABLE;
6280 if (orig != data)
6281 WREG32(RLC_PG_CNTL, data);
6282}
6283
6284static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6285 bool enable)
6286{
6287 u32 data, orig;
6288
6289 orig = data = RREG32(RLC_PG_CNTL);
6290 if (enable)
6291 data |= DYN_PER_CU_PG_ENABLE;
6292 else
6293 data &= ~DYN_PER_CU_PG_ENABLE;
6294 if (orig != data)
6295 WREG32(RLC_PG_CNTL, data);
6296}
6297
6298#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6299#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6300
6301static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6302{
6303 u32 data, orig;
6304 u32 i;
6305
6306 if (rdev->rlc.cs_data) {
6307 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6308 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6309 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6310 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6311 } else {
6312 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6313 for (i = 0; i < 3; i++)
6314 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6315 }
6316 if (rdev->rlc.reg_list) {
6317 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6318 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6319 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6320 }
6321
6322 orig = data = RREG32(RLC_PG_CNTL);
6323 data |= GFX_PG_SRC;
6324 if (orig != data)
6325 WREG32(RLC_PG_CNTL, data);
6326
6327 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6328 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6329
6330 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6331 data &= ~IDLE_POLL_COUNT_MASK;
6332 data |= IDLE_POLL_COUNT(0x60);
6333 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6334
6335 data = 0x10101010;
6336 WREG32(RLC_PG_DELAY, data);
6337
6338 data = RREG32(RLC_PG_DELAY_2);
6339 data &= ~0xff;
6340 data |= 0x3;
6341 WREG32(RLC_PG_DELAY_2, data);
6342
6343 data = RREG32(RLC_AUTO_PG_CTRL);
6344 data &= ~GRBM_REG_SGIT_MASK;
6345 data |= GRBM_REG_SGIT(0x700);
6346 WREG32(RLC_AUTO_PG_CTRL, data);
6347
6348}
6349
6350static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6351{
6352 bool has_pg = false;
6353 bool has_dyn_mgpg = false;
6354 bool has_static_mgpg = false;
6355
6356 /* only APUs have PG */
6357 if (rdev->flags & RADEON_IS_IGP) {
6358 has_pg = true;
6359 has_static_mgpg = true;
6360 if (rdev->family == CHIP_KAVERI)
6361 has_dyn_mgpg = true;
6362 }
6363
6364 if (has_pg) {
6365 cik_enable_gfx_cgpg(rdev, enable);
6366 if (enable) {
6367 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6368 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6369 } else {
6370 cik_enable_gfx_static_mgpg(rdev, false);
6371 cik_enable_gfx_dynamic_mgpg(rdev, false);
6372 }
6373 }
6374
6375}
6376
6377void cik_init_pg(struct radeon_device *rdev)
6378{
6379 bool has_pg = false;
6380
6381 /* only APUs have PG */
6382 if (rdev->flags & RADEON_IS_IGP) {
6383 /* XXX disable this for now */
6384 /* has_pg = true; */
6385 }
6386
6387 if (has_pg) {
6388 cik_enable_sck_slowdown_on_pu(rdev, true);
6389 cik_enable_sck_slowdown_on_pd(rdev, true);
6390 cik_init_gfx_cgpg(rdev);
6391 cik_enable_cp_pg(rdev, true);
6392 cik_enable_gds_pg(rdev, true);
6393 cik_init_ao_cu_mask(rdev);
6394 cik_update_gfx_pg(rdev, true);
6395 }
6396}
6397
Alex Deuchera59781b2012-11-09 10:45:57 -05006398/*
6399 * Interrupts
6400 * Starting with r6xx, interrupts are handled via a ring buffer.
6401 * Ring buffers are areas of GPU accessible memory that the GPU
6402 * writes interrupt vectors into and the host reads vectors out of.
6403 * There is a rptr (read pointer) that determines where the
6404 * host is currently reading, and a wptr (write pointer)
6405 * which determines where the GPU has written. When the
6406 * pointers are equal, the ring is idle. When the GPU
6407 * writes vectors to the ring buffer, it increments the
6408 * wptr. When there is an interrupt, the host then starts
6409 * fetching commands and processing them until the pointers are
6410 * equal again at which point it updates the rptr.
6411 */
6412
6413/**
6414 * cik_enable_interrupts - Enable the interrupt ring buffer
6415 *
6416 * @rdev: radeon_device pointer
6417 *
6418 * Enable the interrupt ring buffer (CIK).
6419 */
6420static void cik_enable_interrupts(struct radeon_device *rdev)
6421{
6422 u32 ih_cntl = RREG32(IH_CNTL);
6423 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6424
6425 ih_cntl |= ENABLE_INTR;
6426 ih_rb_cntl |= IH_RB_ENABLE;
6427 WREG32(IH_CNTL, ih_cntl);
6428 WREG32(IH_RB_CNTL, ih_rb_cntl);
6429 rdev->ih.enabled = true;
6430}
6431
6432/**
6433 * cik_disable_interrupts - Disable the interrupt ring buffer
6434 *
6435 * @rdev: radeon_device pointer
6436 *
6437 * Disable the interrupt ring buffer (CIK).
6438 */
6439static void cik_disable_interrupts(struct radeon_device *rdev)
6440{
6441 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6442 u32 ih_cntl = RREG32(IH_CNTL);
6443
6444 ih_rb_cntl &= ~IH_RB_ENABLE;
6445 ih_cntl &= ~ENABLE_INTR;
6446 WREG32(IH_RB_CNTL, ih_rb_cntl);
6447 WREG32(IH_CNTL, ih_cntl);
6448 /* set rptr, wptr to 0 */
6449 WREG32(IH_RB_RPTR, 0);
6450 WREG32(IH_RB_WPTR, 0);
6451 rdev->ih.enabled = false;
6452 rdev->ih.rptr = 0;
6453}
6454
6455/**
6456 * cik_disable_interrupt_state - Disable all interrupt sources
6457 *
6458 * @rdev: radeon_device pointer
6459 *
6460 * Clear all interrupt enable bits used by the driver (CIK).
6461 */
6462static void cik_disable_interrupt_state(struct radeon_device *rdev)
6463{
6464 u32 tmp;
6465
6466 /* gfx ring */
6467 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04006468 /* sdma */
6469 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6470 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6471 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6472 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05006473 /* compute queues */
6474 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6475 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6476 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6477 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6478 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6479 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6480 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6481 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6482 /* grbm */
6483 WREG32(GRBM_INT_CNTL, 0);
6484 /* vline/vblank, etc. */
6485 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6486 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6487 if (rdev->num_crtc >= 4) {
6488 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6489 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6490 }
6491 if (rdev->num_crtc >= 6) {
6492 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6493 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6494 }
6495
6496 /* dac hotplug */
6497 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6498
6499 /* digital hotplug */
6500 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6501 WREG32(DC_HPD1_INT_CONTROL, tmp);
6502 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6503 WREG32(DC_HPD2_INT_CONTROL, tmp);
6504 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6505 WREG32(DC_HPD3_INT_CONTROL, tmp);
6506 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6507 WREG32(DC_HPD4_INT_CONTROL, tmp);
6508 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6509 WREG32(DC_HPD5_INT_CONTROL, tmp);
6510 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6511 WREG32(DC_HPD6_INT_CONTROL, tmp);
6512
6513}
6514
6515/**
6516 * cik_irq_init - init and enable the interrupt ring
6517 *
6518 * @rdev: radeon_device pointer
6519 *
6520 * Allocate a ring buffer for the interrupt controller,
6521 * enable the RLC, disable interrupts, enable the IH
6522 * ring buffer and enable it (CIK).
6523 * Called at device load and reume.
6524 * Returns 0 for success, errors for failure.
6525 */
6526static int cik_irq_init(struct radeon_device *rdev)
6527{
6528 int ret = 0;
6529 int rb_bufsz;
6530 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6531
6532 /* allocate ring */
6533 ret = r600_ih_ring_alloc(rdev);
6534 if (ret)
6535 return ret;
6536
6537 /* disable irqs */
6538 cik_disable_interrupts(rdev);
6539
6540 /* init rlc */
6541 ret = cik_rlc_resume(rdev);
6542 if (ret) {
6543 r600_ih_ring_fini(rdev);
6544 return ret;
6545 }
6546
6547 /* setup interrupt control */
6548 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6549 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6550 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6551 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6552 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6553 */
6554 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6555 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6556 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6557 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6558
6559 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6560 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6561
6562 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6563 IH_WPTR_OVERFLOW_CLEAR |
6564 (rb_bufsz << 1));
6565
6566 if (rdev->wb.enabled)
6567 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6568
6569 /* set the writeback address whether it's enabled or not */
6570 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6571 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6572
6573 WREG32(IH_RB_CNTL, ih_rb_cntl);
6574
6575 /* set rptr, wptr to 0 */
6576 WREG32(IH_RB_RPTR, 0);
6577 WREG32(IH_RB_WPTR, 0);
6578
6579 /* Default settings for IH_CNTL (disabled at first) */
6580 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6581 /* RPTR_REARM only works if msi's are enabled */
6582 if (rdev->msi_enabled)
6583 ih_cntl |= RPTR_REARM;
6584 WREG32(IH_CNTL, ih_cntl);
6585
6586 /* force the active interrupt state to all disabled */
6587 cik_disable_interrupt_state(rdev);
6588
6589 pci_set_master(rdev->pdev);
6590
6591 /* enable irqs */
6592 cik_enable_interrupts(rdev);
6593
6594 return ret;
6595}
6596
6597/**
6598 * cik_irq_set - enable/disable interrupt sources
6599 *
6600 * @rdev: radeon_device pointer
6601 *
6602 * Enable interrupt sources on the GPU (vblanks, hpd,
6603 * etc.) (CIK).
6604 * Returns 0 for success, errors for failure.
6605 */
6606int cik_irq_set(struct radeon_device *rdev)
6607{
6608 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6609 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04006610 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6611 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05006612 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6613 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6614 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04006615 u32 dma_cntl, dma_cntl1;
Alex Deucher41a524a2013-08-14 01:01:40 -04006616 u32 thermal_int;
Alex Deuchera59781b2012-11-09 10:45:57 -05006617
6618 if (!rdev->irq.installed) {
6619 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6620 return -EINVAL;
6621 }
6622 /* don't enable anything if the ih is disabled */
6623 if (!rdev->ih.enabled) {
6624 cik_disable_interrupts(rdev);
6625 /* force the active interrupt state to all disabled */
6626 cik_disable_interrupt_state(rdev);
6627 return 0;
6628 }
6629
6630 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6631 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6632 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6633 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6634 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6635 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6636
Alex Deucher21a93e12013-04-09 12:47:11 -04006637 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6638 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6639
Alex Deucher2b0781a2013-04-09 14:26:16 -04006640 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6641 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6642 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6643 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6644 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6645 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6646 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6647 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6648
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006649 if (rdev->flags & RADEON_IS_IGP)
6650 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6651 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6652 else
6653 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6654 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
Alex Deucher41a524a2013-08-14 01:01:40 -04006655
Alex Deuchera59781b2012-11-09 10:45:57 -05006656 /* enable CP interrupts on all rings */
6657 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6658 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6659 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6660 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04006661 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6662 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6663 DRM_DEBUG("si_irq_set: sw int cp1\n");
6664 if (ring->me == 1) {
6665 switch (ring->pipe) {
6666 case 0:
6667 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6668 break;
6669 case 1:
6670 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6671 break;
6672 case 2:
6673 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6674 break;
6675 case 3:
6676 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6677 break;
6678 default:
6679 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6680 break;
6681 }
6682 } else if (ring->me == 2) {
6683 switch (ring->pipe) {
6684 case 0:
6685 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6686 break;
6687 case 1:
6688 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6689 break;
6690 case 2:
6691 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6692 break;
6693 case 3:
6694 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6695 break;
6696 default:
6697 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6698 break;
6699 }
6700 } else {
6701 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6702 }
6703 }
6704 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6705 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6706 DRM_DEBUG("si_irq_set: sw int cp2\n");
6707 if (ring->me == 1) {
6708 switch (ring->pipe) {
6709 case 0:
6710 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6711 break;
6712 case 1:
6713 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6714 break;
6715 case 2:
6716 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6717 break;
6718 case 3:
6719 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6720 break;
6721 default:
6722 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6723 break;
6724 }
6725 } else if (ring->me == 2) {
6726 switch (ring->pipe) {
6727 case 0:
6728 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6729 break;
6730 case 1:
6731 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6732 break;
6733 case 2:
6734 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6735 break;
6736 case 3:
6737 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6738 break;
6739 default:
6740 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6741 break;
6742 }
6743 } else {
6744 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6745 }
6746 }
Alex Deuchera59781b2012-11-09 10:45:57 -05006747
Alex Deucher21a93e12013-04-09 12:47:11 -04006748 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6749 DRM_DEBUG("cik_irq_set: sw int dma\n");
6750 dma_cntl |= TRAP_ENABLE;
6751 }
6752
6753 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6754 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6755 dma_cntl1 |= TRAP_ENABLE;
6756 }
6757
Alex Deuchera59781b2012-11-09 10:45:57 -05006758 if (rdev->irq.crtc_vblank_int[0] ||
6759 atomic_read(&rdev->irq.pflip[0])) {
6760 DRM_DEBUG("cik_irq_set: vblank 0\n");
6761 crtc1 |= VBLANK_INTERRUPT_MASK;
6762 }
6763 if (rdev->irq.crtc_vblank_int[1] ||
6764 atomic_read(&rdev->irq.pflip[1])) {
6765 DRM_DEBUG("cik_irq_set: vblank 1\n");
6766 crtc2 |= VBLANK_INTERRUPT_MASK;
6767 }
6768 if (rdev->irq.crtc_vblank_int[2] ||
6769 atomic_read(&rdev->irq.pflip[2])) {
6770 DRM_DEBUG("cik_irq_set: vblank 2\n");
6771 crtc3 |= VBLANK_INTERRUPT_MASK;
6772 }
6773 if (rdev->irq.crtc_vblank_int[3] ||
6774 atomic_read(&rdev->irq.pflip[3])) {
6775 DRM_DEBUG("cik_irq_set: vblank 3\n");
6776 crtc4 |= VBLANK_INTERRUPT_MASK;
6777 }
6778 if (rdev->irq.crtc_vblank_int[4] ||
6779 atomic_read(&rdev->irq.pflip[4])) {
6780 DRM_DEBUG("cik_irq_set: vblank 4\n");
6781 crtc5 |= VBLANK_INTERRUPT_MASK;
6782 }
6783 if (rdev->irq.crtc_vblank_int[5] ||
6784 atomic_read(&rdev->irq.pflip[5])) {
6785 DRM_DEBUG("cik_irq_set: vblank 5\n");
6786 crtc6 |= VBLANK_INTERRUPT_MASK;
6787 }
6788 if (rdev->irq.hpd[0]) {
6789 DRM_DEBUG("cik_irq_set: hpd 1\n");
6790 hpd1 |= DC_HPDx_INT_EN;
6791 }
6792 if (rdev->irq.hpd[1]) {
6793 DRM_DEBUG("cik_irq_set: hpd 2\n");
6794 hpd2 |= DC_HPDx_INT_EN;
6795 }
6796 if (rdev->irq.hpd[2]) {
6797 DRM_DEBUG("cik_irq_set: hpd 3\n");
6798 hpd3 |= DC_HPDx_INT_EN;
6799 }
6800 if (rdev->irq.hpd[3]) {
6801 DRM_DEBUG("cik_irq_set: hpd 4\n");
6802 hpd4 |= DC_HPDx_INT_EN;
6803 }
6804 if (rdev->irq.hpd[4]) {
6805 DRM_DEBUG("cik_irq_set: hpd 5\n");
6806 hpd5 |= DC_HPDx_INT_EN;
6807 }
6808 if (rdev->irq.hpd[5]) {
6809 DRM_DEBUG("cik_irq_set: hpd 6\n");
6810 hpd6 |= DC_HPDx_INT_EN;
6811 }
6812
Alex Deucher41a524a2013-08-14 01:01:40 -04006813 if (rdev->irq.dpm_thermal) {
6814 DRM_DEBUG("dpm thermal\n");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006815 if (rdev->flags & RADEON_IS_IGP)
6816 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6817 else
6818 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
Alex Deucher41a524a2013-08-14 01:01:40 -04006819 }
6820
Alex Deuchera59781b2012-11-09 10:45:57 -05006821 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6822
Alex Deucher21a93e12013-04-09 12:47:11 -04006823 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6824 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6825
Alex Deucher2b0781a2013-04-09 14:26:16 -04006826 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6827 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6828 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6829 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6830 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6831 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6832 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6833 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6834
Alex Deuchera59781b2012-11-09 10:45:57 -05006835 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6836
6837 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6838 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6839 if (rdev->num_crtc >= 4) {
6840 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6841 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6842 }
6843 if (rdev->num_crtc >= 6) {
6844 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6845 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6846 }
6847
6848 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6849 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6850 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6851 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6852 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6853 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6854
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006855 if (rdev->flags & RADEON_IS_IGP)
6856 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6857 else
6858 WREG32_SMC(CG_THERMAL_INT, thermal_int);
Alex Deucher41a524a2013-08-14 01:01:40 -04006859
Alex Deuchera59781b2012-11-09 10:45:57 -05006860 return 0;
6861}
6862
6863/**
6864 * cik_irq_ack - ack interrupt sources
6865 *
6866 * @rdev: radeon_device pointer
6867 *
6868 * Ack interrupt sources on the GPU (vblanks, hpd,
6869 * etc.) (CIK). Certain interrupts sources are sw
6870 * generated and do not require an explicit ack.
6871 */
6872static inline void cik_irq_ack(struct radeon_device *rdev)
6873{
6874 u32 tmp;
6875
6876 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6877 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6878 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6879 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6880 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6881 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6882 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6883
6884 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6885 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6886 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6887 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6888 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6889 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6890 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6891 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6892
6893 if (rdev->num_crtc >= 4) {
6894 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6895 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6896 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6897 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6898 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6899 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6900 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6901 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6902 }
6903
6904 if (rdev->num_crtc >= 6) {
6905 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6906 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6907 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6908 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6909 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6910 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6911 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6912 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6913 }
6914
6915 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6916 tmp = RREG32(DC_HPD1_INT_CONTROL);
6917 tmp |= DC_HPDx_INT_ACK;
6918 WREG32(DC_HPD1_INT_CONTROL, tmp);
6919 }
6920 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6921 tmp = RREG32(DC_HPD2_INT_CONTROL);
6922 tmp |= DC_HPDx_INT_ACK;
6923 WREG32(DC_HPD2_INT_CONTROL, tmp);
6924 }
6925 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6926 tmp = RREG32(DC_HPD3_INT_CONTROL);
6927 tmp |= DC_HPDx_INT_ACK;
6928 WREG32(DC_HPD3_INT_CONTROL, tmp);
6929 }
6930 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6931 tmp = RREG32(DC_HPD4_INT_CONTROL);
6932 tmp |= DC_HPDx_INT_ACK;
6933 WREG32(DC_HPD4_INT_CONTROL, tmp);
6934 }
6935 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6936 tmp = RREG32(DC_HPD5_INT_CONTROL);
6937 tmp |= DC_HPDx_INT_ACK;
6938 WREG32(DC_HPD5_INT_CONTROL, tmp);
6939 }
6940 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6941 tmp = RREG32(DC_HPD5_INT_CONTROL);
6942 tmp |= DC_HPDx_INT_ACK;
6943 WREG32(DC_HPD6_INT_CONTROL, tmp);
6944 }
6945}
6946
6947/**
6948 * cik_irq_disable - disable interrupts
6949 *
6950 * @rdev: radeon_device pointer
6951 *
6952 * Disable interrupts on the hw (CIK).
6953 */
6954static void cik_irq_disable(struct radeon_device *rdev)
6955{
6956 cik_disable_interrupts(rdev);
6957 /* Wait and acknowledge irq */
6958 mdelay(1);
6959 cik_irq_ack(rdev);
6960 cik_disable_interrupt_state(rdev);
6961}
6962
6963/**
6964 * cik_irq_disable - disable interrupts for suspend
6965 *
6966 * @rdev: radeon_device pointer
6967 *
6968 * Disable interrupts and stop the RLC (CIK).
6969 * Used for suspend.
6970 */
6971static void cik_irq_suspend(struct radeon_device *rdev)
6972{
6973 cik_irq_disable(rdev);
6974 cik_rlc_stop(rdev);
6975}
6976
6977/**
6978 * cik_irq_fini - tear down interrupt support
6979 *
6980 * @rdev: radeon_device pointer
6981 *
6982 * Disable interrupts on the hw and free the IH ring
6983 * buffer (CIK).
6984 * Used for driver unload.
6985 */
6986static void cik_irq_fini(struct radeon_device *rdev)
6987{
6988 cik_irq_suspend(rdev);
6989 r600_ih_ring_fini(rdev);
6990}
6991
6992/**
6993 * cik_get_ih_wptr - get the IH ring buffer wptr
6994 *
6995 * @rdev: radeon_device pointer
6996 *
6997 * Get the IH ring buffer wptr from either the register
6998 * or the writeback memory buffer (CIK). Also check for
6999 * ring buffer overflow and deal with it.
7000 * Used by cik_irq_process().
7001 * Returns the value of the wptr.
7002 */
7003static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7004{
7005 u32 wptr, tmp;
7006
7007 if (rdev->wb.enabled)
7008 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7009 else
7010 wptr = RREG32(IH_RB_WPTR);
7011
7012 if (wptr & RB_OVERFLOW) {
7013 /* When a ring buffer overflow happen start parsing interrupt
7014 * from the last not overwritten vector (wptr + 16). Hopefully
7015 * this should allow us to catchup.
7016 */
7017 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7018 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7019 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7020 tmp = RREG32(IH_RB_CNTL);
7021 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7022 WREG32(IH_RB_CNTL, tmp);
7023 }
7024 return (wptr & rdev->ih.ptr_mask);
7025}
7026
7027/* CIK IV Ring
7028 * Each IV ring entry is 128 bits:
7029 * [7:0] - interrupt source id
7030 * [31:8] - reserved
7031 * [59:32] - interrupt source data
7032 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04007033 * [71:64] - RINGID
7034 * CP:
7035 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05007036 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7037 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7038 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7039 * PIPE_ID - ME0 0=3D
7040 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04007041 * SDMA:
7042 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7043 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7044 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05007045 * [79:72] - VMID
7046 * [95:80] - PASID
7047 * [127:96] - reserved
7048 */
7049/**
7050 * cik_irq_process - interrupt handler
7051 *
7052 * @rdev: radeon_device pointer
7053 *
7054 * Interrupt hander (CIK). Walk the IH ring,
7055 * ack interrupts and schedule work to handle
7056 * interrupt events.
7057 * Returns irq process return code.
7058 */
7059int cik_irq_process(struct radeon_device *rdev)
7060{
Alex Deucher2b0781a2013-04-09 14:26:16 -04007061 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7062 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05007063 u32 wptr;
7064 u32 rptr;
7065 u32 src_id, src_data, ring_id;
7066 u8 me_id, pipe_id, queue_id;
7067 u32 ring_index;
7068 bool queue_hotplug = false;
7069 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04007070 u32 addr, status, mc_client;
Alex Deucher41a524a2013-08-14 01:01:40 -04007071 bool queue_thermal = false;
Alex Deuchera59781b2012-11-09 10:45:57 -05007072
7073 if (!rdev->ih.enabled || rdev->shutdown)
7074 return IRQ_NONE;
7075
7076 wptr = cik_get_ih_wptr(rdev);
7077
7078restart_ih:
7079 /* is somebody else already processing irqs? */
7080 if (atomic_xchg(&rdev->ih.lock, 1))
7081 return IRQ_NONE;
7082
7083 rptr = rdev->ih.rptr;
7084 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7085
7086 /* Order reading of wptr vs. reading of IH ring data */
7087 rmb();
7088
7089 /* display interrupts */
7090 cik_irq_ack(rdev);
7091
7092 while (rptr != wptr) {
7093 /* wptr/rptr are in bytes! */
7094 ring_index = rptr / 4;
7095 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7096 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7097 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05007098
7099 switch (src_id) {
7100 case 1: /* D1 vblank/vline */
7101 switch (src_data) {
7102 case 0: /* D1 vblank */
7103 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7104 if (rdev->irq.crtc_vblank_int[0]) {
7105 drm_handle_vblank(rdev->ddev, 0);
7106 rdev->pm.vblank_sync = true;
7107 wake_up(&rdev->irq.vblank_queue);
7108 }
7109 if (atomic_read(&rdev->irq.pflip[0]))
7110 radeon_crtc_handle_flip(rdev, 0);
7111 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7112 DRM_DEBUG("IH: D1 vblank\n");
7113 }
7114 break;
7115 case 1: /* D1 vline */
7116 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7117 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7118 DRM_DEBUG("IH: D1 vline\n");
7119 }
7120 break;
7121 default:
7122 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7123 break;
7124 }
7125 break;
7126 case 2: /* D2 vblank/vline */
7127 switch (src_data) {
7128 case 0: /* D2 vblank */
7129 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7130 if (rdev->irq.crtc_vblank_int[1]) {
7131 drm_handle_vblank(rdev->ddev, 1);
7132 rdev->pm.vblank_sync = true;
7133 wake_up(&rdev->irq.vblank_queue);
7134 }
7135 if (atomic_read(&rdev->irq.pflip[1]))
7136 radeon_crtc_handle_flip(rdev, 1);
7137 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7138 DRM_DEBUG("IH: D2 vblank\n");
7139 }
7140 break;
7141 case 1: /* D2 vline */
7142 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7143 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7144 DRM_DEBUG("IH: D2 vline\n");
7145 }
7146 break;
7147 default:
7148 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7149 break;
7150 }
7151 break;
7152 case 3: /* D3 vblank/vline */
7153 switch (src_data) {
7154 case 0: /* D3 vblank */
7155 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7156 if (rdev->irq.crtc_vblank_int[2]) {
7157 drm_handle_vblank(rdev->ddev, 2);
7158 rdev->pm.vblank_sync = true;
7159 wake_up(&rdev->irq.vblank_queue);
7160 }
7161 if (atomic_read(&rdev->irq.pflip[2]))
7162 radeon_crtc_handle_flip(rdev, 2);
7163 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7164 DRM_DEBUG("IH: D3 vblank\n");
7165 }
7166 break;
7167 case 1: /* D3 vline */
7168 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7169 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7170 DRM_DEBUG("IH: D3 vline\n");
7171 }
7172 break;
7173 default:
7174 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7175 break;
7176 }
7177 break;
7178 case 4: /* D4 vblank/vline */
7179 switch (src_data) {
7180 case 0: /* D4 vblank */
7181 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7182 if (rdev->irq.crtc_vblank_int[3]) {
7183 drm_handle_vblank(rdev->ddev, 3);
7184 rdev->pm.vblank_sync = true;
7185 wake_up(&rdev->irq.vblank_queue);
7186 }
7187 if (atomic_read(&rdev->irq.pflip[3]))
7188 radeon_crtc_handle_flip(rdev, 3);
7189 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7190 DRM_DEBUG("IH: D4 vblank\n");
7191 }
7192 break;
7193 case 1: /* D4 vline */
7194 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7195 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7196 DRM_DEBUG("IH: D4 vline\n");
7197 }
7198 break;
7199 default:
7200 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7201 break;
7202 }
7203 break;
7204 case 5: /* D5 vblank/vline */
7205 switch (src_data) {
7206 case 0: /* D5 vblank */
7207 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7208 if (rdev->irq.crtc_vblank_int[4]) {
7209 drm_handle_vblank(rdev->ddev, 4);
7210 rdev->pm.vblank_sync = true;
7211 wake_up(&rdev->irq.vblank_queue);
7212 }
7213 if (atomic_read(&rdev->irq.pflip[4]))
7214 radeon_crtc_handle_flip(rdev, 4);
7215 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7216 DRM_DEBUG("IH: D5 vblank\n");
7217 }
7218 break;
7219 case 1: /* D5 vline */
7220 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7221 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7222 DRM_DEBUG("IH: D5 vline\n");
7223 }
7224 break;
7225 default:
7226 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7227 break;
7228 }
7229 break;
7230 case 6: /* D6 vblank/vline */
7231 switch (src_data) {
7232 case 0: /* D6 vblank */
7233 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7234 if (rdev->irq.crtc_vblank_int[5]) {
7235 drm_handle_vblank(rdev->ddev, 5);
7236 rdev->pm.vblank_sync = true;
7237 wake_up(&rdev->irq.vblank_queue);
7238 }
7239 if (atomic_read(&rdev->irq.pflip[5]))
7240 radeon_crtc_handle_flip(rdev, 5);
7241 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7242 DRM_DEBUG("IH: D6 vblank\n");
7243 }
7244 break;
7245 case 1: /* D6 vline */
7246 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7247 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7248 DRM_DEBUG("IH: D6 vline\n");
7249 }
7250 break;
7251 default:
7252 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7253 break;
7254 }
7255 break;
7256 case 42: /* HPD hotplug */
7257 switch (src_data) {
7258 case 0:
7259 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7260 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7261 queue_hotplug = true;
7262 DRM_DEBUG("IH: HPD1\n");
7263 }
7264 break;
7265 case 1:
7266 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7267 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7268 queue_hotplug = true;
7269 DRM_DEBUG("IH: HPD2\n");
7270 }
7271 break;
7272 case 2:
7273 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7274 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7275 queue_hotplug = true;
7276 DRM_DEBUG("IH: HPD3\n");
7277 }
7278 break;
7279 case 3:
7280 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7281 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7282 queue_hotplug = true;
7283 DRM_DEBUG("IH: HPD4\n");
7284 }
7285 break;
7286 case 4:
7287 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7288 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7289 queue_hotplug = true;
7290 DRM_DEBUG("IH: HPD5\n");
7291 }
7292 break;
7293 case 5:
7294 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7295 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7296 queue_hotplug = true;
7297 DRM_DEBUG("IH: HPD6\n");
7298 }
7299 break;
7300 default:
7301 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7302 break;
7303 }
7304 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04007305 case 146:
7306 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04007307 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7308 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7309 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04007310 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7311 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007312 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04007313 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007314 status);
7315 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04007316 /* reset addr and status */
7317 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7318 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007319 case 176: /* GFX RB CP_INT */
7320 case 177: /* GFX IB CP_INT */
7321 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7322 break;
7323 case 181: /* CP EOP event */
7324 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007325 /* XXX check the bitfield order! */
7326 me_id = (ring_id & 0x60) >> 5;
7327 pipe_id = (ring_id & 0x18) >> 3;
7328 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007329 switch (me_id) {
7330 case 0:
7331 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7332 break;
7333 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05007334 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04007335 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7336 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7337 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7338 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05007339 break;
7340 }
7341 break;
7342 case 184: /* CP Privileged reg access */
7343 DRM_ERROR("Illegal register access in command stream\n");
7344 /* XXX check the bitfield order! */
7345 me_id = (ring_id & 0x60) >> 5;
7346 pipe_id = (ring_id & 0x18) >> 3;
7347 queue_id = (ring_id & 0x7) >> 0;
7348 switch (me_id) {
7349 case 0:
7350 /* This results in a full GPU reset, but all we need to do is soft
7351 * reset the CP for gfx
7352 */
7353 queue_reset = true;
7354 break;
7355 case 1:
7356 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007357 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007358 break;
7359 case 2:
7360 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007361 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007362 break;
7363 }
7364 break;
7365 case 185: /* CP Privileged inst */
7366 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007367 /* XXX check the bitfield order! */
7368 me_id = (ring_id & 0x60) >> 5;
7369 pipe_id = (ring_id & 0x18) >> 3;
7370 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007371 switch (me_id) {
7372 case 0:
7373 /* This results in a full GPU reset, but all we need to do is soft
7374 * reset the CP for gfx
7375 */
7376 queue_reset = true;
7377 break;
7378 case 1:
7379 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007380 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007381 break;
7382 case 2:
7383 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007384 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007385 break;
7386 }
7387 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04007388 case 224: /* SDMA trap event */
7389 /* XXX check the bitfield order! */
7390 me_id = (ring_id & 0x3) >> 0;
7391 queue_id = (ring_id & 0xc) >> 2;
7392 DRM_DEBUG("IH: SDMA trap\n");
7393 switch (me_id) {
7394 case 0:
7395 switch (queue_id) {
7396 case 0:
7397 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7398 break;
7399 case 1:
7400 /* XXX compute */
7401 break;
7402 case 2:
7403 /* XXX compute */
7404 break;
7405 }
7406 break;
7407 case 1:
7408 switch (queue_id) {
7409 case 0:
7410 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7411 break;
7412 case 1:
7413 /* XXX compute */
7414 break;
7415 case 2:
7416 /* XXX compute */
7417 break;
7418 }
7419 break;
7420 }
7421 break;
Alex Deucher41a524a2013-08-14 01:01:40 -04007422 case 230: /* thermal low to high */
7423 DRM_DEBUG("IH: thermal low to high\n");
7424 rdev->pm.dpm.thermal.high_to_low = false;
7425 queue_thermal = true;
7426 break;
7427 case 231: /* thermal high to low */
7428 DRM_DEBUG("IH: thermal high to low\n");
7429 rdev->pm.dpm.thermal.high_to_low = true;
7430 queue_thermal = true;
7431 break;
7432 case 233: /* GUI IDLE */
7433 DRM_DEBUG("IH: GUI idle\n");
7434 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04007435 case 241: /* SDMA Privileged inst */
7436 case 247: /* SDMA Privileged inst */
7437 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7438 /* XXX check the bitfield order! */
7439 me_id = (ring_id & 0x3) >> 0;
7440 queue_id = (ring_id & 0xc) >> 2;
7441 switch (me_id) {
7442 case 0:
7443 switch (queue_id) {
7444 case 0:
7445 queue_reset = true;
7446 break;
7447 case 1:
7448 /* XXX compute */
7449 queue_reset = true;
7450 break;
7451 case 2:
7452 /* XXX compute */
7453 queue_reset = true;
7454 break;
7455 }
7456 break;
7457 case 1:
7458 switch (queue_id) {
7459 case 0:
7460 queue_reset = true;
7461 break;
7462 case 1:
7463 /* XXX compute */
7464 queue_reset = true;
7465 break;
7466 case 2:
7467 /* XXX compute */
7468 queue_reset = true;
7469 break;
7470 }
7471 break;
7472 }
7473 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007474 default:
7475 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7476 break;
7477 }
7478
7479 /* wptr/rptr are in bytes! */
7480 rptr += 16;
7481 rptr &= rdev->ih.ptr_mask;
7482 }
7483 if (queue_hotplug)
7484 schedule_work(&rdev->hotplug_work);
7485 if (queue_reset)
7486 schedule_work(&rdev->reset_work);
Alex Deucher41a524a2013-08-14 01:01:40 -04007487 if (queue_thermal)
7488 schedule_work(&rdev->pm.dpm.thermal.work);
Alex Deuchera59781b2012-11-09 10:45:57 -05007489 rdev->ih.rptr = rptr;
7490 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7491 atomic_set(&rdev->ih.lock, 0);
7492
7493 /* make sure wptr hasn't changed while processing */
7494 wptr = cik_get_ih_wptr(rdev);
7495 if (wptr != rptr)
7496 goto restart_ih;
7497
7498 return IRQ_HANDLED;
7499}
Alex Deucher7bf94a22012-08-17 11:48:29 -04007500
7501/*
7502 * startup/shutdown callbacks
7503 */
7504/**
7505 * cik_startup - program the asic to a functional state
7506 *
7507 * @rdev: radeon_device pointer
7508 *
7509 * Programs the asic to a functional state (CIK).
7510 * Called by cik_init() and cik_resume().
7511 * Returns 0 for success, error for failure.
7512 */
7513static int cik_startup(struct radeon_device *rdev)
7514{
7515 struct radeon_ring *ring;
7516 int r;
7517
Alex Deucher8a7cd272013-08-06 11:29:39 -04007518 /* enable pcie gen2/3 link */
7519 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04007520 /* enable aspm */
7521 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04007522
Alex Deucher6fab3feb2013-08-04 12:13:17 -04007523 cik_mc_program(rdev);
7524
Alex Deucher7bf94a22012-08-17 11:48:29 -04007525 if (rdev->flags & RADEON_IS_IGP) {
7526 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7527 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7528 r = cik_init_microcode(rdev);
7529 if (r) {
7530 DRM_ERROR("Failed to load firmware!\n");
7531 return r;
7532 }
7533 }
7534 } else {
7535 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7536 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7537 !rdev->mc_fw) {
7538 r = cik_init_microcode(rdev);
7539 if (r) {
7540 DRM_ERROR("Failed to load firmware!\n");
7541 return r;
7542 }
7543 }
7544
7545 r = ci_mc_load_microcode(rdev);
7546 if (r) {
7547 DRM_ERROR("Failed to load MC firmware!\n");
7548 return r;
7549 }
7550 }
7551
7552 r = r600_vram_scratch_init(rdev);
7553 if (r)
7554 return r;
7555
Alex Deucher7bf94a22012-08-17 11:48:29 -04007556 r = cik_pcie_gart_enable(rdev);
7557 if (r)
7558 return r;
7559 cik_gpu_init(rdev);
7560
7561 /* allocate rlc buffers */
Alex Deucher22c775c2013-07-23 09:41:05 -04007562 if (rdev->flags & RADEON_IS_IGP) {
7563 if (rdev->family == CHIP_KAVERI) {
7564 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7565 rdev->rlc.reg_list_size =
7566 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7567 } else {
7568 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7569 rdev->rlc.reg_list_size =
7570 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7571 }
7572 }
7573 rdev->rlc.cs_data = ci_cs_data;
7574 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
Alex Deucher1fd11772013-04-17 17:53:50 -04007575 r = sumo_rlc_init(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007576 if (r) {
7577 DRM_ERROR("Failed to init rlc BOs!\n");
7578 return r;
7579 }
7580
7581 /* allocate wb buffer */
7582 r = radeon_wb_init(rdev);
7583 if (r)
7584 return r;
7585
Alex Deucher963e81f2013-06-26 17:37:11 -04007586 /* allocate mec buffers */
7587 r = cik_mec_init(rdev);
7588 if (r) {
7589 DRM_ERROR("Failed to init MEC BOs!\n");
7590 return r;
7591 }
7592
Alex Deucher7bf94a22012-08-17 11:48:29 -04007593 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7594 if (r) {
7595 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7596 return r;
7597 }
7598
Alex Deucher963e81f2013-06-26 17:37:11 -04007599 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7600 if (r) {
7601 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7602 return r;
7603 }
7604
7605 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7606 if (r) {
7607 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7608 return r;
7609 }
7610
Alex Deucher7bf94a22012-08-17 11:48:29 -04007611 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7612 if (r) {
7613 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7614 return r;
7615 }
7616
7617 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7618 if (r) {
7619 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7620 return r;
7621 }
7622
Alex Deucher5e884f62013-08-06 11:39:38 -04007623 r = radeon_uvd_resume(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007624 if (!r) {
Alex Deucher5e884f62013-08-06 11:39:38 -04007625 cik_uvd_resume(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007626 r = radeon_fence_driver_start_ring(rdev,
7627 R600_RING_TYPE_UVD_INDEX);
7628 if (r)
7629 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7630 }
7631 if (r)
7632 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7633
Alex Deucher7bf94a22012-08-17 11:48:29 -04007634 /* Enable IRQ */
7635 if (!rdev->irq.installed) {
7636 r = radeon_irq_kms_init(rdev);
7637 if (r)
7638 return r;
7639 }
7640
7641 r = cik_irq_init(rdev);
7642 if (r) {
7643 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7644 radeon_irq_kms_fini(rdev);
7645 return r;
7646 }
7647 cik_irq_set(rdev);
7648
7649 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7650 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7651 CP_RB0_RPTR, CP_RB0_WPTR,
7652 0, 0xfffff, RADEON_CP_PACKET2);
7653 if (r)
7654 return r;
7655
Alex Deucher963e81f2013-06-26 17:37:11 -04007656 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04007657 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007658 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7659 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7660 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04007661 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007662 if (r)
7663 return r;
7664 ring->me = 1; /* first MEC */
7665 ring->pipe = 0; /* first pipe */
7666 ring->queue = 0; /* first queue */
7667 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7668
Alex Deucher2615b532013-06-03 11:21:58 -04007669 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007670 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7671 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7672 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04007673 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007674 if (r)
7675 return r;
7676 /* dGPU only have 1 MEC */
7677 ring->me = 1; /* first MEC */
7678 ring->pipe = 0; /* first pipe */
7679 ring->queue = 1; /* second queue */
7680 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7681
Alex Deucher7bf94a22012-08-17 11:48:29 -04007682 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7683 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7684 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7685 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7686 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7687 if (r)
7688 return r;
7689
7690 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7691 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7692 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7693 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7694 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7695 if (r)
7696 return r;
7697
7698 r = cik_cp_resume(rdev);
7699 if (r)
7700 return r;
7701
7702 r = cik_sdma_resume(rdev);
7703 if (r)
7704 return r;
7705
Christian König87167bb2013-04-09 13:39:21 -04007706 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7707 if (ring->ring_size) {
Christian König02c9f7f2013-08-13 11:56:51 +02007708 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
Christian König87167bb2013-04-09 13:39:21 -04007709 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7710 0, 0xfffff, RADEON_CP_PACKET2);
7711 if (!r)
Alex Deucher5e884f62013-08-06 11:39:38 -04007712 r = r600_uvd_init(rdev, true);
Christian König87167bb2013-04-09 13:39:21 -04007713 if (r)
7714 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7715 }
7716
Alex Deucher7bf94a22012-08-17 11:48:29 -04007717 r = radeon_ib_pool_init(rdev);
7718 if (r) {
7719 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7720 return r;
7721 }
7722
7723 r = radeon_vm_manager_init(rdev);
7724 if (r) {
7725 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7726 return r;
7727 }
7728
7729 return 0;
7730}
7731
7732/**
7733 * cik_resume - resume the asic to a functional state
7734 *
7735 * @rdev: radeon_device pointer
7736 *
7737 * Programs the asic to a functional state (CIK).
7738 * Called at resume.
7739 * Returns 0 for success, error for failure.
7740 */
7741int cik_resume(struct radeon_device *rdev)
7742{
7743 int r;
7744
7745 /* post card */
7746 atom_asic_init(rdev->mode_info.atom_context);
7747
Alex Deucher0aafd312013-04-09 14:43:30 -04007748 /* init golden registers */
7749 cik_init_golden_registers(rdev);
7750
Alex Deucher7bf94a22012-08-17 11:48:29 -04007751 rdev->accel_working = true;
7752 r = cik_startup(rdev);
7753 if (r) {
7754 DRM_ERROR("cik startup failed on resume\n");
7755 rdev->accel_working = false;
7756 return r;
7757 }
7758
7759 return r;
7760
7761}
7762
7763/**
7764 * cik_suspend - suspend the asic
7765 *
7766 * @rdev: radeon_device pointer
7767 *
7768 * Bring the chip into a state suitable for suspend (CIK).
7769 * Called at suspend.
7770 * Returns 0 for success.
7771 */
7772int cik_suspend(struct radeon_device *rdev)
7773{
7774 radeon_vm_manager_fini(rdev);
7775 cik_cp_enable(rdev, false);
7776 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02007777 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007778 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007779 cik_irq_suspend(rdev);
7780 radeon_wb_disable(rdev);
7781 cik_pcie_gart_disable(rdev);
7782 return 0;
7783}
7784
7785/* Plan is to move initialization in that function and use
7786 * helper function so that radeon_device_init pretty much
7787 * do nothing more than calling asic specific function. This
7788 * should also allow to remove a bunch of callback function
7789 * like vram_info.
7790 */
7791/**
7792 * cik_init - asic specific driver and hw init
7793 *
7794 * @rdev: radeon_device pointer
7795 *
7796 * Setup asic specific driver variables and program the hw
7797 * to a functional state (CIK).
7798 * Called at driver startup.
7799 * Returns 0 for success, errors for failure.
7800 */
7801int cik_init(struct radeon_device *rdev)
7802{
7803 struct radeon_ring *ring;
7804 int r;
7805
7806 /* Read BIOS */
7807 if (!radeon_get_bios(rdev)) {
7808 if (ASIC_IS_AVIVO(rdev))
7809 return -EINVAL;
7810 }
7811 /* Must be an ATOMBIOS */
7812 if (!rdev->is_atom_bios) {
7813 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7814 return -EINVAL;
7815 }
7816 r = radeon_atombios_init(rdev);
7817 if (r)
7818 return r;
7819
7820 /* Post card if necessary */
7821 if (!radeon_card_posted(rdev)) {
7822 if (!rdev->bios) {
7823 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7824 return -EINVAL;
7825 }
7826 DRM_INFO("GPU not posted. posting now...\n");
7827 atom_asic_init(rdev->mode_info.atom_context);
7828 }
Alex Deucher0aafd312013-04-09 14:43:30 -04007829 /* init golden registers */
7830 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007831 /* Initialize scratch registers */
7832 cik_scratch_init(rdev);
7833 /* Initialize surface registers */
7834 radeon_surface_init(rdev);
7835 /* Initialize clocks */
7836 radeon_get_clock_info(rdev->ddev);
7837
7838 /* Fence driver */
7839 r = radeon_fence_driver_init(rdev);
7840 if (r)
7841 return r;
7842
7843 /* initialize memory controller */
7844 r = cik_mc_init(rdev);
7845 if (r)
7846 return r;
7847 /* Memory manager */
7848 r = radeon_bo_init(rdev);
7849 if (r)
7850 return r;
7851
7852 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7853 ring->ring_obj = NULL;
7854 r600_ring_init(rdev, ring, 1024 * 1024);
7855
Alex Deucher963e81f2013-06-26 17:37:11 -04007856 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7857 ring->ring_obj = NULL;
7858 r600_ring_init(rdev, ring, 1024 * 1024);
7859 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7860 if (r)
7861 return r;
7862
7863 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7864 ring->ring_obj = NULL;
7865 r600_ring_init(rdev, ring, 1024 * 1024);
7866 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7867 if (r)
7868 return r;
7869
Alex Deucher7bf94a22012-08-17 11:48:29 -04007870 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7871 ring->ring_obj = NULL;
7872 r600_ring_init(rdev, ring, 256 * 1024);
7873
7874 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7875 ring->ring_obj = NULL;
7876 r600_ring_init(rdev, ring, 256 * 1024);
7877
Christian König87167bb2013-04-09 13:39:21 -04007878 r = radeon_uvd_init(rdev);
7879 if (!r) {
7880 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7881 ring->ring_obj = NULL;
7882 r600_ring_init(rdev, ring, 4096);
7883 }
7884
Alex Deucher7bf94a22012-08-17 11:48:29 -04007885 rdev->ih.ring_obj = NULL;
7886 r600_ih_ring_init(rdev, 64 * 1024);
7887
7888 r = r600_pcie_gart_init(rdev);
7889 if (r)
7890 return r;
7891
7892 rdev->accel_working = true;
7893 r = cik_startup(rdev);
7894 if (r) {
7895 dev_err(rdev->dev, "disabling GPU acceleration\n");
7896 cik_cp_fini(rdev);
7897 cik_sdma_fini(rdev);
7898 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007899 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007900 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007901 radeon_wb_fini(rdev);
7902 radeon_ib_pool_fini(rdev);
7903 radeon_vm_manager_fini(rdev);
7904 radeon_irq_kms_fini(rdev);
7905 cik_pcie_gart_fini(rdev);
7906 rdev->accel_working = false;
7907 }
7908
7909 /* Don't start up if the MC ucode is missing.
7910 * The default clocks and voltages before the MC ucode
7911 * is loaded are not suffient for advanced operations.
7912 */
7913 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7914 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7915 return -EINVAL;
7916 }
7917
7918 return 0;
7919}
7920
7921/**
7922 * cik_fini - asic specific driver and hw fini
7923 *
7924 * @rdev: radeon_device pointer
7925 *
7926 * Tear down the asic specific driver variables and program the hw
7927 * to an idle state (CIK).
7928 * Called at driver unload.
7929 */
7930void cik_fini(struct radeon_device *rdev)
7931{
7932 cik_cp_fini(rdev);
7933 cik_sdma_fini(rdev);
7934 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007935 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007936 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007937 radeon_wb_fini(rdev);
7938 radeon_vm_manager_fini(rdev);
7939 radeon_ib_pool_fini(rdev);
7940 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02007941 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007942 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007943 cik_pcie_gart_fini(rdev);
7944 r600_vram_scratch_fini(rdev);
7945 radeon_gem_fini(rdev);
7946 radeon_fence_driver_fini(rdev);
7947 radeon_bo_fini(rdev);
7948 radeon_atombios_fini(rdev);
7949 kfree(rdev->bios);
7950 rdev->bios = NULL;
7951}
Alex Deuchercd84a272012-07-20 17:13:13 -04007952
7953/* display watermark setup */
7954/**
7955 * dce8_line_buffer_adjust - Set up the line buffer
7956 *
7957 * @rdev: radeon_device pointer
7958 * @radeon_crtc: the selected display controller
7959 * @mode: the current display mode on the selected display
7960 * controller
7961 *
7962 * Setup up the line buffer allocation for
7963 * the selected display controller (CIK).
7964 * Returns the line buffer size in pixels.
7965 */
7966static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7967 struct radeon_crtc *radeon_crtc,
7968 struct drm_display_mode *mode)
7969{
7970 u32 tmp;
7971
7972 /*
7973 * Line Buffer Setup
7974 * There are 6 line buffers, one for each display controllers.
7975 * There are 3 partitions per LB. Select the number of partitions
7976 * to enable based on the display width. For display widths larger
7977 * than 4096, you need use to use 2 display controllers and combine
7978 * them using the stereo blender.
7979 */
7980 if (radeon_crtc->base.enabled && mode) {
7981 if (mode->crtc_hdisplay < 1920)
7982 tmp = 1;
7983 else if (mode->crtc_hdisplay < 2560)
7984 tmp = 2;
7985 else if (mode->crtc_hdisplay < 4096)
7986 tmp = 0;
7987 else {
7988 DRM_DEBUG_KMS("Mode too big for LB!\n");
7989 tmp = 0;
7990 }
7991 } else
7992 tmp = 1;
7993
7994 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7995 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7996
7997 if (radeon_crtc->base.enabled && mode) {
7998 switch (tmp) {
7999 case 0:
8000 default:
8001 return 4096 * 2;
8002 case 1:
8003 return 1920 * 2;
8004 case 2:
8005 return 2560 * 2;
8006 }
8007 }
8008
8009 /* controller not enabled, so no lb used */
8010 return 0;
8011}
8012
8013/**
8014 * cik_get_number_of_dram_channels - get the number of dram channels
8015 *
8016 * @rdev: radeon_device pointer
8017 *
8018 * Look up the number of video ram channels (CIK).
8019 * Used for display watermark bandwidth calculations
8020 * Returns the number of dram channels
8021 */
8022static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8023{
8024 u32 tmp = RREG32(MC_SHARED_CHMAP);
8025
8026 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8027 case 0:
8028 default:
8029 return 1;
8030 case 1:
8031 return 2;
8032 case 2:
8033 return 4;
8034 case 3:
8035 return 8;
8036 case 4:
8037 return 3;
8038 case 5:
8039 return 6;
8040 case 6:
8041 return 10;
8042 case 7:
8043 return 12;
8044 case 8:
8045 return 16;
8046 }
8047}
8048
8049struct dce8_wm_params {
8050 u32 dram_channels; /* number of dram channels */
8051 u32 yclk; /* bandwidth per dram data pin in kHz */
8052 u32 sclk; /* engine clock in kHz */
8053 u32 disp_clk; /* display clock in kHz */
8054 u32 src_width; /* viewport width */
8055 u32 active_time; /* active display time in ns */
8056 u32 blank_time; /* blank time in ns */
8057 bool interlaced; /* mode is interlaced */
8058 fixed20_12 vsc; /* vertical scale ratio */
8059 u32 num_heads; /* number of active crtcs */
8060 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8061 u32 lb_size; /* line buffer allocated to pipe */
8062 u32 vtaps; /* vertical scaler taps */
8063};
8064
8065/**
8066 * dce8_dram_bandwidth - get the dram bandwidth
8067 *
8068 * @wm: watermark calculation data
8069 *
8070 * Calculate the raw dram bandwidth (CIK).
8071 * Used for display watermark bandwidth calculations
8072 * Returns the dram bandwidth in MBytes/s
8073 */
8074static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8075{
8076 /* Calculate raw DRAM Bandwidth */
8077 fixed20_12 dram_efficiency; /* 0.7 */
8078 fixed20_12 yclk, dram_channels, bandwidth;
8079 fixed20_12 a;
8080
8081 a.full = dfixed_const(1000);
8082 yclk.full = dfixed_const(wm->yclk);
8083 yclk.full = dfixed_div(yclk, a);
8084 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8085 a.full = dfixed_const(10);
8086 dram_efficiency.full = dfixed_const(7);
8087 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8088 bandwidth.full = dfixed_mul(dram_channels, yclk);
8089 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8090
8091 return dfixed_trunc(bandwidth);
8092}
8093
8094/**
8095 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8096 *
8097 * @wm: watermark calculation data
8098 *
8099 * Calculate the dram bandwidth used for display (CIK).
8100 * Used for display watermark bandwidth calculations
8101 * Returns the dram bandwidth for display in MBytes/s
8102 */
8103static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8104{
8105 /* Calculate DRAM Bandwidth and the part allocated to display. */
8106 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8107 fixed20_12 yclk, dram_channels, bandwidth;
8108 fixed20_12 a;
8109
8110 a.full = dfixed_const(1000);
8111 yclk.full = dfixed_const(wm->yclk);
8112 yclk.full = dfixed_div(yclk, a);
8113 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8114 a.full = dfixed_const(10);
8115 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8116 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8117 bandwidth.full = dfixed_mul(dram_channels, yclk);
8118 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8119
8120 return dfixed_trunc(bandwidth);
8121}
8122
8123/**
8124 * dce8_data_return_bandwidth - get the data return bandwidth
8125 *
8126 * @wm: watermark calculation data
8127 *
8128 * Calculate the data return bandwidth used for display (CIK).
8129 * Used for display watermark bandwidth calculations
8130 * Returns the data return bandwidth in MBytes/s
8131 */
8132static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8133{
8134 /* Calculate the display Data return Bandwidth */
8135 fixed20_12 return_efficiency; /* 0.8 */
8136 fixed20_12 sclk, bandwidth;
8137 fixed20_12 a;
8138
8139 a.full = dfixed_const(1000);
8140 sclk.full = dfixed_const(wm->sclk);
8141 sclk.full = dfixed_div(sclk, a);
8142 a.full = dfixed_const(10);
8143 return_efficiency.full = dfixed_const(8);
8144 return_efficiency.full = dfixed_div(return_efficiency, a);
8145 a.full = dfixed_const(32);
8146 bandwidth.full = dfixed_mul(a, sclk);
8147 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8148
8149 return dfixed_trunc(bandwidth);
8150}
8151
8152/**
8153 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8154 *
8155 * @wm: watermark calculation data
8156 *
8157 * Calculate the dmif bandwidth used for display (CIK).
8158 * Used for display watermark bandwidth calculations
8159 * Returns the dmif bandwidth in MBytes/s
8160 */
8161static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8162{
8163 /* Calculate the DMIF Request Bandwidth */
8164 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8165 fixed20_12 disp_clk, bandwidth;
8166 fixed20_12 a, b;
8167
8168 a.full = dfixed_const(1000);
8169 disp_clk.full = dfixed_const(wm->disp_clk);
8170 disp_clk.full = dfixed_div(disp_clk, a);
8171 a.full = dfixed_const(32);
8172 b.full = dfixed_mul(a, disp_clk);
8173
8174 a.full = dfixed_const(10);
8175 disp_clk_request_efficiency.full = dfixed_const(8);
8176 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8177
8178 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8179
8180 return dfixed_trunc(bandwidth);
8181}
8182
8183/**
8184 * dce8_available_bandwidth - get the min available bandwidth
8185 *
8186 * @wm: watermark calculation data
8187 *
8188 * Calculate the min available bandwidth used for display (CIK).
8189 * Used for display watermark bandwidth calculations
8190 * Returns the min available bandwidth in MBytes/s
8191 */
8192static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8193{
8194 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8195 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8196 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8197 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8198
8199 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8200}
8201
8202/**
8203 * dce8_average_bandwidth - get the average available bandwidth
8204 *
8205 * @wm: watermark calculation data
8206 *
8207 * Calculate the average available bandwidth used for display (CIK).
8208 * Used for display watermark bandwidth calculations
8209 * Returns the average available bandwidth in MBytes/s
8210 */
8211static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8212{
8213 /* Calculate the display mode Average Bandwidth
8214 * DisplayMode should contain the source and destination dimensions,
8215 * timing, etc.
8216 */
8217 fixed20_12 bpp;
8218 fixed20_12 line_time;
8219 fixed20_12 src_width;
8220 fixed20_12 bandwidth;
8221 fixed20_12 a;
8222
8223 a.full = dfixed_const(1000);
8224 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8225 line_time.full = dfixed_div(line_time, a);
8226 bpp.full = dfixed_const(wm->bytes_per_pixel);
8227 src_width.full = dfixed_const(wm->src_width);
8228 bandwidth.full = dfixed_mul(src_width, bpp);
8229 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8230 bandwidth.full = dfixed_div(bandwidth, line_time);
8231
8232 return dfixed_trunc(bandwidth);
8233}
8234
8235/**
8236 * dce8_latency_watermark - get the latency watermark
8237 *
8238 * @wm: watermark calculation data
8239 *
8240 * Calculate the latency watermark (CIK).
8241 * Used for display watermark bandwidth calculations
8242 * Returns the latency watermark in ns
8243 */
8244static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8245{
8246 /* First calculate the latency in ns */
8247 u32 mc_latency = 2000; /* 2000 ns. */
8248 u32 available_bandwidth = dce8_available_bandwidth(wm);
8249 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8250 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8251 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8252 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8253 (wm->num_heads * cursor_line_pair_return_time);
8254 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8255 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8256 u32 tmp, dmif_size = 12288;
8257 fixed20_12 a, b, c;
8258
8259 if (wm->num_heads == 0)
8260 return 0;
8261
8262 a.full = dfixed_const(2);
8263 b.full = dfixed_const(1);
8264 if ((wm->vsc.full > a.full) ||
8265 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8266 (wm->vtaps >= 5) ||
8267 ((wm->vsc.full >= a.full) && wm->interlaced))
8268 max_src_lines_per_dst_line = 4;
8269 else
8270 max_src_lines_per_dst_line = 2;
8271
8272 a.full = dfixed_const(available_bandwidth);
8273 b.full = dfixed_const(wm->num_heads);
8274 a.full = dfixed_div(a, b);
8275
8276 b.full = dfixed_const(mc_latency + 512);
8277 c.full = dfixed_const(wm->disp_clk);
8278 b.full = dfixed_div(b, c);
8279
8280 c.full = dfixed_const(dmif_size);
8281 b.full = dfixed_div(c, b);
8282
8283 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8284
8285 b.full = dfixed_const(1000);
8286 c.full = dfixed_const(wm->disp_clk);
8287 b.full = dfixed_div(c, b);
8288 c.full = dfixed_const(wm->bytes_per_pixel);
8289 b.full = dfixed_mul(b, c);
8290
8291 lb_fill_bw = min(tmp, dfixed_trunc(b));
8292
8293 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8294 b.full = dfixed_const(1000);
8295 c.full = dfixed_const(lb_fill_bw);
8296 b.full = dfixed_div(c, b);
8297 a.full = dfixed_div(a, b);
8298 line_fill_time = dfixed_trunc(a);
8299
8300 if (line_fill_time < wm->active_time)
8301 return latency;
8302 else
8303 return latency + (line_fill_time - wm->active_time);
8304
8305}
8306
8307/**
8308 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8309 * average and available dram bandwidth
8310 *
8311 * @wm: watermark calculation data
8312 *
8313 * Check if the display average bandwidth fits in the display
8314 * dram bandwidth (CIK).
8315 * Used for display watermark bandwidth calculations
8316 * Returns true if the display fits, false if not.
8317 */
8318static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8319{
8320 if (dce8_average_bandwidth(wm) <=
8321 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8322 return true;
8323 else
8324 return false;
8325}
8326
8327/**
8328 * dce8_average_bandwidth_vs_available_bandwidth - check
8329 * average and available bandwidth
8330 *
8331 * @wm: watermark calculation data
8332 *
8333 * Check if the display average bandwidth fits in the display
8334 * available bandwidth (CIK).
8335 * Used for display watermark bandwidth calculations
8336 * Returns true if the display fits, false if not.
8337 */
8338static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8339{
8340 if (dce8_average_bandwidth(wm) <=
8341 (dce8_available_bandwidth(wm) / wm->num_heads))
8342 return true;
8343 else
8344 return false;
8345}
8346
8347/**
8348 * dce8_check_latency_hiding - check latency hiding
8349 *
8350 * @wm: watermark calculation data
8351 *
8352 * Check latency hiding (CIK).
8353 * Used for display watermark bandwidth calculations
8354 * Returns true if the display fits, false if not.
8355 */
8356static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8357{
8358 u32 lb_partitions = wm->lb_size / wm->src_width;
8359 u32 line_time = wm->active_time + wm->blank_time;
8360 u32 latency_tolerant_lines;
8361 u32 latency_hiding;
8362 fixed20_12 a;
8363
8364 a.full = dfixed_const(1);
8365 if (wm->vsc.full > a.full)
8366 latency_tolerant_lines = 1;
8367 else {
8368 if (lb_partitions <= (wm->vtaps + 1))
8369 latency_tolerant_lines = 1;
8370 else
8371 latency_tolerant_lines = 2;
8372 }
8373
8374 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8375
8376 if (dce8_latency_watermark(wm) <= latency_hiding)
8377 return true;
8378 else
8379 return false;
8380}
8381
8382/**
8383 * dce8_program_watermarks - program display watermarks
8384 *
8385 * @rdev: radeon_device pointer
8386 * @radeon_crtc: the selected display controller
8387 * @lb_size: line buffer size
8388 * @num_heads: number of display controllers in use
8389 *
8390 * Calculate and program the display watermarks for the
8391 * selected display controller (CIK).
8392 */
8393static void dce8_program_watermarks(struct radeon_device *rdev,
8394 struct radeon_crtc *radeon_crtc,
8395 u32 lb_size, u32 num_heads)
8396{
8397 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05008398 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04008399 u32 pixel_period;
8400 u32 line_time = 0;
8401 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8402 u32 tmp, wm_mask;
8403
8404 if (radeon_crtc->base.enabled && num_heads && mode) {
8405 pixel_period = 1000000 / (u32)mode->clock;
8406 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8407
Alex Deucher58ea2de2013-01-24 10:03:39 -05008408 /* watermark for high clocks */
8409 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8410 rdev->pm.dpm_enabled) {
8411 wm_high.yclk =
8412 radeon_dpm_get_mclk(rdev, false) * 10;
8413 wm_high.sclk =
8414 radeon_dpm_get_sclk(rdev, false) * 10;
8415 } else {
8416 wm_high.yclk = rdev->pm.current_mclk * 10;
8417 wm_high.sclk = rdev->pm.current_sclk * 10;
8418 }
8419
8420 wm_high.disp_clk = mode->clock;
8421 wm_high.src_width = mode->crtc_hdisplay;
8422 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8423 wm_high.blank_time = line_time - wm_high.active_time;
8424 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04008425 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008426 wm_high.interlaced = true;
8427 wm_high.vsc = radeon_crtc->vsc;
8428 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04008429 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008430 wm_high.vtaps = 2;
8431 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8432 wm_high.lb_size = lb_size;
8433 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8434 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04008435
8436 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008437 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04008438
8439 /* possibly force display priority to high */
8440 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008441 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8442 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8443 !dce8_check_latency_hiding(&wm_high) ||
8444 (rdev->disp_priority == 2)) {
8445 DRM_DEBUG_KMS("force priority to high\n");
8446 }
8447
8448 /* watermark for low clocks */
8449 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8450 rdev->pm.dpm_enabled) {
8451 wm_low.yclk =
8452 radeon_dpm_get_mclk(rdev, true) * 10;
8453 wm_low.sclk =
8454 radeon_dpm_get_sclk(rdev, true) * 10;
8455 } else {
8456 wm_low.yclk = rdev->pm.current_mclk * 10;
8457 wm_low.sclk = rdev->pm.current_sclk * 10;
8458 }
8459
8460 wm_low.disp_clk = mode->clock;
8461 wm_low.src_width = mode->crtc_hdisplay;
8462 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8463 wm_low.blank_time = line_time - wm_low.active_time;
8464 wm_low.interlaced = false;
8465 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8466 wm_low.interlaced = true;
8467 wm_low.vsc = radeon_crtc->vsc;
8468 wm_low.vtaps = 1;
8469 if (radeon_crtc->rmx_type != RMX_OFF)
8470 wm_low.vtaps = 2;
8471 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8472 wm_low.lb_size = lb_size;
8473 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8474 wm_low.num_heads = num_heads;
8475
8476 /* set for low clocks */
8477 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8478
8479 /* possibly force display priority to high */
8480 /* should really do this at mode validation time... */
8481 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8482 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8483 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04008484 (rdev->disp_priority == 2)) {
8485 DRM_DEBUG_KMS("force priority to high\n");
8486 }
8487 }
8488
8489 /* select wm A */
8490 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8491 tmp = wm_mask;
8492 tmp &= ~LATENCY_WATERMARK_MASK(3);
8493 tmp |= LATENCY_WATERMARK_MASK(1);
8494 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8495 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8496 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8497 LATENCY_HIGH_WATERMARK(line_time)));
8498 /* select wm B */
8499 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8500 tmp &= ~LATENCY_WATERMARK_MASK(3);
8501 tmp |= LATENCY_WATERMARK_MASK(2);
8502 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8503 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8504 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8505 LATENCY_HIGH_WATERMARK(line_time)));
8506 /* restore original selection */
8507 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05008508
8509 /* save values for DPM */
8510 radeon_crtc->line_time = line_time;
8511 radeon_crtc->wm_high = latency_watermark_a;
8512 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04008513}
8514
8515/**
8516 * dce8_bandwidth_update - program display watermarks
8517 *
8518 * @rdev: radeon_device pointer
8519 *
8520 * Calculate and program the display watermarks and line
8521 * buffer allocation (CIK).
8522 */
8523void dce8_bandwidth_update(struct radeon_device *rdev)
8524{
8525 struct drm_display_mode *mode = NULL;
8526 u32 num_heads = 0, lb_size;
8527 int i;
8528
8529 radeon_update_display_priority(rdev);
8530
8531 for (i = 0; i < rdev->num_crtc; i++) {
8532 if (rdev->mode_info.crtcs[i]->base.enabled)
8533 num_heads++;
8534 }
8535 for (i = 0; i < rdev->num_crtc; i++) {
8536 mode = &rdev->mode_info.crtcs[i]->base.mode;
8537 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8538 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8539 }
8540}
Alex Deucher44fa3462012-12-18 22:17:00 -05008541
8542/**
8543 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8544 *
8545 * @rdev: radeon_device pointer
8546 *
8547 * Fetches a GPU clock counter snapshot (SI).
8548 * Returns the 64 bit clock counter snapshot.
8549 */
8550uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8551{
8552 uint64_t clock;
8553
8554 mutex_lock(&rdev->gpu_clock_mutex);
8555 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8556 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8557 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8558 mutex_unlock(&rdev->gpu_clock_mutex);
8559 return clock;
8560}
8561
Christian König87167bb2013-04-09 13:39:21 -04008562static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8563 u32 cntl_reg, u32 status_reg)
8564{
8565 int r, i;
8566 struct atom_clock_dividers dividers;
8567 uint32_t tmp;
8568
8569 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8570 clock, false, &dividers);
8571 if (r)
8572 return r;
8573
8574 tmp = RREG32_SMC(cntl_reg);
8575 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8576 tmp |= dividers.post_divider;
8577 WREG32_SMC(cntl_reg, tmp);
8578
8579 for (i = 0; i < 100; i++) {
8580 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8581 break;
8582 mdelay(10);
8583 }
8584 if (i == 100)
8585 return -ETIMEDOUT;
8586
8587 return 0;
8588}
8589
8590int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8591{
8592 int r = 0;
8593
8594 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8595 if (r)
8596 return r;
8597
8598 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8599 return r;
8600}
8601
Alex Deucher77df5082013-08-09 10:02:40 -04008602void cik_uvd_resume(struct radeon_device *rdev)
Christian König87167bb2013-04-09 13:39:21 -04008603{
8604 uint64_t addr;
8605 uint32_t size;
Christian König87167bb2013-04-09 13:39:21 -04008606
8607 /* programm the VCPU memory controller bits 0-27 */
8608 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02008609 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04008610 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8611 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8612
8613 addr += size;
8614 size = RADEON_UVD_STACK_SIZE >> 3;
8615 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8616 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8617
8618 addr += size;
8619 size = RADEON_UVD_HEAP_SIZE >> 3;
8620 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8621 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8622
8623 /* bits 28-31 */
8624 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8625 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8626
8627 /* bits 32-39 */
8628 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8629 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8630
Christian König87167bb2013-04-09 13:39:21 -04008631}
Alex Deucher8a7cd272013-08-06 11:29:39 -04008632
8633static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8634{
8635 struct pci_dev *root = rdev->pdev->bus->self;
8636 int bridge_pos, gpu_pos;
8637 u32 speed_cntl, mask, current_data_rate;
8638 int ret, i;
8639 u16 tmp16;
8640
8641 if (radeon_pcie_gen2 == 0)
8642 return;
8643
8644 if (rdev->flags & RADEON_IS_IGP)
8645 return;
8646
8647 if (!(rdev->flags & RADEON_IS_PCIE))
8648 return;
8649
8650 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8651 if (ret != 0)
8652 return;
8653
8654 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8655 return;
8656
8657 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8658 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8659 LC_CURRENT_DATA_RATE_SHIFT;
8660 if (mask & DRM_PCIE_SPEED_80) {
8661 if (current_data_rate == 2) {
8662 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8663 return;
8664 }
8665 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8666 } else if (mask & DRM_PCIE_SPEED_50) {
8667 if (current_data_rate == 1) {
8668 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8669 return;
8670 }
8671 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8672 }
8673
8674 bridge_pos = pci_pcie_cap(root);
8675 if (!bridge_pos)
8676 return;
8677
8678 gpu_pos = pci_pcie_cap(rdev->pdev);
8679 if (!gpu_pos)
8680 return;
8681
8682 if (mask & DRM_PCIE_SPEED_80) {
8683 /* re-try equalization if gen3 is not already enabled */
8684 if (current_data_rate != 2) {
8685 u16 bridge_cfg, gpu_cfg;
8686 u16 bridge_cfg2, gpu_cfg2;
8687 u32 max_lw, current_lw, tmp;
8688
8689 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8690 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8691
8692 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8693 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8694
8695 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8696 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8697
8698 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8699 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8700 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8701
8702 if (current_lw < max_lw) {
8703 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8704 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8705 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8706 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8707 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8708 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8709 }
8710 }
8711
8712 for (i = 0; i < 10; i++) {
8713 /* check status */
8714 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8715 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8716 break;
8717
8718 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8719 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8720
8721 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8722 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8723
8724 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8725 tmp |= LC_SET_QUIESCE;
8726 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8727
8728 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8729 tmp |= LC_REDO_EQ;
8730 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8731
8732 mdelay(100);
8733
8734 /* linkctl */
8735 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8736 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8737 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8738 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8739
8740 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8741 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8742 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8743 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8744
8745 /* linkctl2 */
8746 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8747 tmp16 &= ~((1 << 4) | (7 << 9));
8748 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8749 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8750
8751 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8752 tmp16 &= ~((1 << 4) | (7 << 9));
8753 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8754 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8755
8756 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8757 tmp &= ~LC_SET_QUIESCE;
8758 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8759 }
8760 }
8761 }
8762
8763 /* set the link speed */
8764 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8765 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8766 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8767
8768 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8769 tmp16 &= ~0xf;
8770 if (mask & DRM_PCIE_SPEED_80)
8771 tmp16 |= 3; /* gen3 */
8772 else if (mask & DRM_PCIE_SPEED_50)
8773 tmp16 |= 2; /* gen2 */
8774 else
8775 tmp16 |= 1; /* gen1 */
8776 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8777
8778 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8779 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8780 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8781
8782 for (i = 0; i < rdev->usec_timeout; i++) {
8783 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8784 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8785 break;
8786 udelay(1);
8787 }
8788}
Alex Deucher7235711a42013-04-04 13:58:09 -04008789
8790static void cik_program_aspm(struct radeon_device *rdev)
8791{
8792 u32 data, orig;
8793 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8794 bool disable_clkreq = false;
8795
8796 if (radeon_aspm == 0)
8797 return;
8798
8799 /* XXX double check IGPs */
8800 if (rdev->flags & RADEON_IS_IGP)
8801 return;
8802
8803 if (!(rdev->flags & RADEON_IS_PCIE))
8804 return;
8805
8806 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8807 data &= ~LC_XMIT_N_FTS_MASK;
8808 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8809 if (orig != data)
8810 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8811
8812 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8813 data |= LC_GO_TO_RECOVERY;
8814 if (orig != data)
8815 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8816
8817 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8818 data |= P_IGNORE_EDB_ERR;
8819 if (orig != data)
8820 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8821
8822 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8823 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8824 data |= LC_PMI_TO_L1_DIS;
8825 if (!disable_l0s)
8826 data |= LC_L0S_INACTIVITY(7);
8827
8828 if (!disable_l1) {
8829 data |= LC_L1_INACTIVITY(7);
8830 data &= ~LC_PMI_TO_L1_DIS;
8831 if (orig != data)
8832 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8833
8834 if (!disable_plloff_in_l1) {
8835 bool clk_req_support;
8836
8837 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8838 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8839 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8840 if (orig != data)
8841 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8842
8843 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8844 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8845 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8846 if (orig != data)
8847 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8848
8849 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8850 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8851 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8852 if (orig != data)
8853 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8854
8855 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8856 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8857 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8858 if (orig != data)
8859 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8860
8861 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8862 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8863 data |= LC_DYN_LANES_PWR_STATE(3);
8864 if (orig != data)
8865 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8866
8867 if (!disable_clkreq) {
8868 struct pci_dev *root = rdev->pdev->bus->self;
8869 u32 lnkcap;
8870
8871 clk_req_support = false;
8872 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8873 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8874 clk_req_support = true;
8875 } else {
8876 clk_req_support = false;
8877 }
8878
8879 if (clk_req_support) {
8880 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8881 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8882 if (orig != data)
8883 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8884
8885 orig = data = RREG32_SMC(THM_CLK_CNTL);
8886 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8887 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8888 if (orig != data)
8889 WREG32_SMC(THM_CLK_CNTL, data);
8890
8891 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8892 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8893 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8894 if (orig != data)
8895 WREG32_SMC(MISC_CLK_CTRL, data);
8896
8897 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8898 data &= ~BCLK_AS_XCLK;
8899 if (orig != data)
8900 WREG32_SMC(CG_CLKPIN_CNTL, data);
8901
8902 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8903 data &= ~FORCE_BIF_REFCLK_EN;
8904 if (orig != data)
8905 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8906
8907 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8908 data &= ~MPLL_CLKOUT_SEL_MASK;
8909 data |= MPLL_CLKOUT_SEL(4);
8910 if (orig != data)
8911 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8912 }
8913 }
8914 } else {
8915 if (orig != data)
8916 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8917 }
8918
8919 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8920 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8921 if (orig != data)
8922 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8923
8924 if (!disable_l0s) {
8925 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8926 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8927 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8928 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8929 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8930 data &= ~LC_L0S_INACTIVITY_MASK;
8931 if (orig != data)
8932 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8933 }
8934 }
8935 }
8936}