blob: 34be795de173fb40ef5063b0720500ab89065fd0 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher22c775c2013-07-23 09:41:05 -040034#include "clearstate_ci.h"
Alex Deucher02c81322012-12-18 21:43:07 -050035
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040042MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -040043MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050044MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040049MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050050MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51MODULE_FIRMWARE("radeon/KABINI_me.bin");
52MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040055MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050056
Alex Deuchera59781b2012-11-09 10:45:57 -050057extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040059extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040061extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -040062extern void sumo_rlc_fini(struct radeon_device *rdev);
63extern int sumo_rlc_init(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040064extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher866d83d2013-04-15 17:13:29 -040065extern void si_rlc_reset(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040066extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040067static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040068static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040069static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher22c775c2013-07-23 09:41:05 -040070static void cik_init_pg(struct radeon_device *rdev);
71static void cik_init_cg(struct radeon_device *rdev);
Alex Deucher77df5082013-08-09 10:02:40 -040072void cik_uvd_resume(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073
Alex Deucher286d9cc2013-06-21 15:50:47 -040074/* get temperature in millidegrees */
75int ci_get_temp(struct radeon_device *rdev)
76{
77 u32 temp;
78 int actual_temp = 0;
79
80 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
81 CTF_TEMP_SHIFT;
82
83 if (temp & 0x200)
84 actual_temp = 255;
85 else
86 actual_temp = temp & 0x1ff;
87
88 actual_temp = actual_temp * 1000;
89
90 return actual_temp;
91}
92
93/* get temperature in millidegrees */
94int kv_get_temp(struct radeon_device *rdev)
95{
96 u32 temp;
97 int actual_temp = 0;
98
99 temp = RREG32_SMC(0xC0300E0C);
100
101 if (temp)
102 actual_temp = (temp / 8) - 49;
103 else
104 actual_temp = 0;
105
106 actual_temp = actual_temp * 1000;
107
108 return actual_temp;
109}
110
Alex Deucher6e2c3c02013-04-03 19:28:32 -0400111/*
112 * Indirect registers accessor
113 */
114u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
115{
116 u32 r;
117
118 WREG32(PCIE_INDEX, reg);
119 (void)RREG32(PCIE_INDEX);
120 r = RREG32(PCIE_DATA);
121 return r;
122}
123
124void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
125{
126 WREG32(PCIE_INDEX, reg);
127 (void)RREG32(PCIE_INDEX);
128 WREG32(PCIE_DATA, v);
129 (void)RREG32(PCIE_DATA);
130}
131
Alex Deucher22c775c2013-07-23 09:41:05 -0400132static const u32 spectre_rlc_save_restore_register_list[] =
133{
134 (0x0e00 << 16) | (0xc12c >> 2),
135 0x00000000,
136 (0x0e00 << 16) | (0xc140 >> 2),
137 0x00000000,
138 (0x0e00 << 16) | (0xc150 >> 2),
139 0x00000000,
140 (0x0e00 << 16) | (0xc15c >> 2),
141 0x00000000,
142 (0x0e00 << 16) | (0xc168 >> 2),
143 0x00000000,
144 (0x0e00 << 16) | (0xc170 >> 2),
145 0x00000000,
146 (0x0e00 << 16) | (0xc178 >> 2),
147 0x00000000,
148 (0x0e00 << 16) | (0xc204 >> 2),
149 0x00000000,
150 (0x0e00 << 16) | (0xc2b4 >> 2),
151 0x00000000,
152 (0x0e00 << 16) | (0xc2b8 >> 2),
153 0x00000000,
154 (0x0e00 << 16) | (0xc2bc >> 2),
155 0x00000000,
156 (0x0e00 << 16) | (0xc2c0 >> 2),
157 0x00000000,
158 (0x0e00 << 16) | (0x8228 >> 2),
159 0x00000000,
160 (0x0e00 << 16) | (0x829c >> 2),
161 0x00000000,
162 (0x0e00 << 16) | (0x869c >> 2),
163 0x00000000,
164 (0x0600 << 16) | (0x98f4 >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0x98f8 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0x9900 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc260 >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0x90e8 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0x3c000 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0x3c00c >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0x8c1c >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0x9700 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xcd20 >> 2),
183 0x00000000,
184 (0x4e00 << 16) | (0xcd20 >> 2),
185 0x00000000,
186 (0x5e00 << 16) | (0xcd20 >> 2),
187 0x00000000,
188 (0x6e00 << 16) | (0xcd20 >> 2),
189 0x00000000,
190 (0x7e00 << 16) | (0xcd20 >> 2),
191 0x00000000,
192 (0x8e00 << 16) | (0xcd20 >> 2),
193 0x00000000,
194 (0x9e00 << 16) | (0xcd20 >> 2),
195 0x00000000,
196 (0xae00 << 16) | (0xcd20 >> 2),
197 0x00000000,
198 (0xbe00 << 16) | (0xcd20 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0x89bc >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0x8900 >> 2),
203 0x00000000,
204 0x3,
205 (0x0e00 << 16) | (0xc130 >> 2),
206 0x00000000,
207 (0x0e00 << 16) | (0xc134 >> 2),
208 0x00000000,
209 (0x0e00 << 16) | (0xc1fc >> 2),
210 0x00000000,
211 (0x0e00 << 16) | (0xc208 >> 2),
212 0x00000000,
213 (0x0e00 << 16) | (0xc264 >> 2),
214 0x00000000,
215 (0x0e00 << 16) | (0xc268 >> 2),
216 0x00000000,
217 (0x0e00 << 16) | (0xc26c >> 2),
218 0x00000000,
219 (0x0e00 << 16) | (0xc270 >> 2),
220 0x00000000,
221 (0x0e00 << 16) | (0xc274 >> 2),
222 0x00000000,
223 (0x0e00 << 16) | (0xc278 >> 2),
224 0x00000000,
225 (0x0e00 << 16) | (0xc27c >> 2),
226 0x00000000,
227 (0x0e00 << 16) | (0xc280 >> 2),
228 0x00000000,
229 (0x0e00 << 16) | (0xc284 >> 2),
230 0x00000000,
231 (0x0e00 << 16) | (0xc288 >> 2),
232 0x00000000,
233 (0x0e00 << 16) | (0xc28c >> 2),
234 0x00000000,
235 (0x0e00 << 16) | (0xc290 >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc294 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc298 >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc29c >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc2a0 >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc2a4 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc2a8 >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc2ac >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc2b0 >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0x301d0 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0x30238 >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0x30250 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0x30254 >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0x30258 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0x3025c >> 2),
264 0x00000000,
265 (0x4e00 << 16) | (0xc900 >> 2),
266 0x00000000,
267 (0x5e00 << 16) | (0xc900 >> 2),
268 0x00000000,
269 (0x6e00 << 16) | (0xc900 >> 2),
270 0x00000000,
271 (0x7e00 << 16) | (0xc900 >> 2),
272 0x00000000,
273 (0x8e00 << 16) | (0xc900 >> 2),
274 0x00000000,
275 (0x9e00 << 16) | (0xc900 >> 2),
276 0x00000000,
277 (0xae00 << 16) | (0xc900 >> 2),
278 0x00000000,
279 (0xbe00 << 16) | (0xc900 >> 2),
280 0x00000000,
281 (0x4e00 << 16) | (0xc904 >> 2),
282 0x00000000,
283 (0x5e00 << 16) | (0xc904 >> 2),
284 0x00000000,
285 (0x6e00 << 16) | (0xc904 >> 2),
286 0x00000000,
287 (0x7e00 << 16) | (0xc904 >> 2),
288 0x00000000,
289 (0x8e00 << 16) | (0xc904 >> 2),
290 0x00000000,
291 (0x9e00 << 16) | (0xc904 >> 2),
292 0x00000000,
293 (0xae00 << 16) | (0xc904 >> 2),
294 0x00000000,
295 (0xbe00 << 16) | (0xc904 >> 2),
296 0x00000000,
297 (0x4e00 << 16) | (0xc908 >> 2),
298 0x00000000,
299 (0x5e00 << 16) | (0xc908 >> 2),
300 0x00000000,
301 (0x6e00 << 16) | (0xc908 >> 2),
302 0x00000000,
303 (0x7e00 << 16) | (0xc908 >> 2),
304 0x00000000,
305 (0x8e00 << 16) | (0xc908 >> 2),
306 0x00000000,
307 (0x9e00 << 16) | (0xc908 >> 2),
308 0x00000000,
309 (0xae00 << 16) | (0xc908 >> 2),
310 0x00000000,
311 (0xbe00 << 16) | (0xc908 >> 2),
312 0x00000000,
313 (0x4e00 << 16) | (0xc90c >> 2),
314 0x00000000,
315 (0x5e00 << 16) | (0xc90c >> 2),
316 0x00000000,
317 (0x6e00 << 16) | (0xc90c >> 2),
318 0x00000000,
319 (0x7e00 << 16) | (0xc90c >> 2),
320 0x00000000,
321 (0x8e00 << 16) | (0xc90c >> 2),
322 0x00000000,
323 (0x9e00 << 16) | (0xc90c >> 2),
324 0x00000000,
325 (0xae00 << 16) | (0xc90c >> 2),
326 0x00000000,
327 (0xbe00 << 16) | (0xc90c >> 2),
328 0x00000000,
329 (0x4e00 << 16) | (0xc910 >> 2),
330 0x00000000,
331 (0x5e00 << 16) | (0xc910 >> 2),
332 0x00000000,
333 (0x6e00 << 16) | (0xc910 >> 2),
334 0x00000000,
335 (0x7e00 << 16) | (0xc910 >> 2),
336 0x00000000,
337 (0x8e00 << 16) | (0xc910 >> 2),
338 0x00000000,
339 (0x9e00 << 16) | (0xc910 >> 2),
340 0x00000000,
341 (0xae00 << 16) | (0xc910 >> 2),
342 0x00000000,
343 (0xbe00 << 16) | (0xc910 >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0xc99c >> 2),
346 0x00000000,
347 (0x0e00 << 16) | (0x9834 >> 2),
348 0x00000000,
349 (0x0000 << 16) | (0x30f00 >> 2),
350 0x00000000,
351 (0x0001 << 16) | (0x30f00 >> 2),
352 0x00000000,
353 (0x0000 << 16) | (0x30f04 >> 2),
354 0x00000000,
355 (0x0001 << 16) | (0x30f04 >> 2),
356 0x00000000,
357 (0x0000 << 16) | (0x30f08 >> 2),
358 0x00000000,
359 (0x0001 << 16) | (0x30f08 >> 2),
360 0x00000000,
361 (0x0000 << 16) | (0x30f0c >> 2),
362 0x00000000,
363 (0x0001 << 16) | (0x30f0c >> 2),
364 0x00000000,
365 (0x0600 << 16) | (0x9b7c >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0x8a14 >> 2),
368 0x00000000,
369 (0x0e00 << 16) | (0x8a18 >> 2),
370 0x00000000,
371 (0x0600 << 16) | (0x30a00 >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0x8bf0 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0x8bcc >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0x8b24 >> 2),
378 0x00000000,
379 (0x0e00 << 16) | (0x30a04 >> 2),
380 0x00000000,
381 (0x0600 << 16) | (0x30a10 >> 2),
382 0x00000000,
383 (0x0600 << 16) | (0x30a14 >> 2),
384 0x00000000,
385 (0x0600 << 16) | (0x30a18 >> 2),
386 0x00000000,
387 (0x0600 << 16) | (0x30a2c >> 2),
388 0x00000000,
389 (0x0e00 << 16) | (0xc700 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0xc704 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0xc708 >> 2),
394 0x00000000,
395 (0x0e00 << 16) | (0xc768 >> 2),
396 0x00000000,
397 (0x0400 << 16) | (0xc770 >> 2),
398 0x00000000,
399 (0x0400 << 16) | (0xc774 >> 2),
400 0x00000000,
401 (0x0400 << 16) | (0xc778 >> 2),
402 0x00000000,
403 (0x0400 << 16) | (0xc77c >> 2),
404 0x00000000,
405 (0x0400 << 16) | (0xc780 >> 2),
406 0x00000000,
407 (0x0400 << 16) | (0xc784 >> 2),
408 0x00000000,
409 (0x0400 << 16) | (0xc788 >> 2),
410 0x00000000,
411 (0x0400 << 16) | (0xc78c >> 2),
412 0x00000000,
413 (0x0400 << 16) | (0xc798 >> 2),
414 0x00000000,
415 (0x0400 << 16) | (0xc79c >> 2),
416 0x00000000,
417 (0x0400 << 16) | (0xc7a0 >> 2),
418 0x00000000,
419 (0x0400 << 16) | (0xc7a4 >> 2),
420 0x00000000,
421 (0x0400 << 16) | (0xc7a8 >> 2),
422 0x00000000,
423 (0x0400 << 16) | (0xc7ac >> 2),
424 0x00000000,
425 (0x0400 << 16) | (0xc7b0 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc7b4 >> 2),
428 0x00000000,
429 (0x0e00 << 16) | (0x9100 >> 2),
430 0x00000000,
431 (0x0e00 << 16) | (0x3c010 >> 2),
432 0x00000000,
433 (0x0e00 << 16) | (0x92a8 >> 2),
434 0x00000000,
435 (0x0e00 << 16) | (0x92ac >> 2),
436 0x00000000,
437 (0x0e00 << 16) | (0x92b4 >> 2),
438 0x00000000,
439 (0x0e00 << 16) | (0x92b8 >> 2),
440 0x00000000,
441 (0x0e00 << 16) | (0x92bc >> 2),
442 0x00000000,
443 (0x0e00 << 16) | (0x92c0 >> 2),
444 0x00000000,
445 (0x0e00 << 16) | (0x92c4 >> 2),
446 0x00000000,
447 (0x0e00 << 16) | (0x92c8 >> 2),
448 0x00000000,
449 (0x0e00 << 16) | (0x92cc >> 2),
450 0x00000000,
451 (0x0e00 << 16) | (0x92d0 >> 2),
452 0x00000000,
453 (0x0e00 << 16) | (0x8c00 >> 2),
454 0x00000000,
455 (0x0e00 << 16) | (0x8c04 >> 2),
456 0x00000000,
457 (0x0e00 << 16) | (0x8c20 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x8c38 >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x8c3c >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0xae00 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x9604 >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0xac08 >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0xac0c >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0xac10 >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0xac14 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0xac58 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0xac68 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0xac6c >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0xac70 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0xac74 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0xac78 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0xac7c >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0xac80 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xac84 >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xac88 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0xac8c >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0x970c >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0x9714 >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0x9718 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0x971c >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0x31068 >> 2),
506 0x00000000,
507 (0x4e00 << 16) | (0x31068 >> 2),
508 0x00000000,
509 (0x5e00 << 16) | (0x31068 >> 2),
510 0x00000000,
511 (0x6e00 << 16) | (0x31068 >> 2),
512 0x00000000,
513 (0x7e00 << 16) | (0x31068 >> 2),
514 0x00000000,
515 (0x8e00 << 16) | (0x31068 >> 2),
516 0x00000000,
517 (0x9e00 << 16) | (0x31068 >> 2),
518 0x00000000,
519 (0xae00 << 16) | (0x31068 >> 2),
520 0x00000000,
521 (0xbe00 << 16) | (0x31068 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xcd10 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0xcd14 >> 2),
526 0x00000000,
527 (0x0e00 << 16) | (0x88b0 >> 2),
528 0x00000000,
529 (0x0e00 << 16) | (0x88b4 >> 2),
530 0x00000000,
531 (0x0e00 << 16) | (0x88b8 >> 2),
532 0x00000000,
533 (0x0e00 << 16) | (0x88bc >> 2),
534 0x00000000,
535 (0x0400 << 16) | (0x89c0 >> 2),
536 0x00000000,
537 (0x0e00 << 16) | (0x88c4 >> 2),
538 0x00000000,
539 (0x0e00 << 16) | (0x88c8 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0x88d0 >> 2),
542 0x00000000,
543 (0x0e00 << 16) | (0x88d4 >> 2),
544 0x00000000,
545 (0x0e00 << 16) | (0x88d8 >> 2),
546 0x00000000,
547 (0x0e00 << 16) | (0x8980 >> 2),
548 0x00000000,
549 (0x0e00 << 16) | (0x30938 >> 2),
550 0x00000000,
551 (0x0e00 << 16) | (0x3093c >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0x30940 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0x89a0 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x30900 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x30904 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x89b4 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x3c210 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x3c214 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x3c218 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x8904 >> 2),
570 0x00000000,
571 0x5,
572 (0x0e00 << 16) | (0x8c28 >> 2),
573 (0x0e00 << 16) | (0x8c2c >> 2),
574 (0x0e00 << 16) | (0x8c30 >> 2),
575 (0x0e00 << 16) | (0x8c34 >> 2),
576 (0x0e00 << 16) | (0x9600 >> 2),
577};
578
579static const u32 kalindi_rlc_save_restore_register_list[] =
580{
581 (0x0e00 << 16) | (0xc12c >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0xc140 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0xc150 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0xc15c >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0xc168 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0xc170 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0xc204 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0xc2b4 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0xc2b8 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0xc2bc >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xc2c0 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0x8228 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0x829c >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0x869c >> 2),
608 0x00000000,
609 (0x0600 << 16) | (0x98f4 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0x98f8 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0x9900 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc260 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0x90e8 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0x3c000 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0x3c00c >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0x8c1c >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0x9700 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xcd20 >> 2),
628 0x00000000,
629 (0x4e00 << 16) | (0xcd20 >> 2),
630 0x00000000,
631 (0x5e00 << 16) | (0xcd20 >> 2),
632 0x00000000,
633 (0x6e00 << 16) | (0xcd20 >> 2),
634 0x00000000,
635 (0x7e00 << 16) | (0xcd20 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x89bc >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0x8900 >> 2),
640 0x00000000,
641 0x3,
642 (0x0e00 << 16) | (0xc130 >> 2),
643 0x00000000,
644 (0x0e00 << 16) | (0xc134 >> 2),
645 0x00000000,
646 (0x0e00 << 16) | (0xc1fc >> 2),
647 0x00000000,
648 (0x0e00 << 16) | (0xc208 >> 2),
649 0x00000000,
650 (0x0e00 << 16) | (0xc264 >> 2),
651 0x00000000,
652 (0x0e00 << 16) | (0xc268 >> 2),
653 0x00000000,
654 (0x0e00 << 16) | (0xc26c >> 2),
655 0x00000000,
656 (0x0e00 << 16) | (0xc270 >> 2),
657 0x00000000,
658 (0x0e00 << 16) | (0xc274 >> 2),
659 0x00000000,
660 (0x0e00 << 16) | (0xc28c >> 2),
661 0x00000000,
662 (0x0e00 << 16) | (0xc290 >> 2),
663 0x00000000,
664 (0x0e00 << 16) | (0xc294 >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xc298 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xc2a0 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0xc2a4 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0xc2a8 >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc2ac >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0x301d0 >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0x30238 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0x30250 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0x30254 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0x30258 >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0x3025c >> 2),
687 0x00000000,
688 (0x4e00 << 16) | (0xc900 >> 2),
689 0x00000000,
690 (0x5e00 << 16) | (0xc900 >> 2),
691 0x00000000,
692 (0x6e00 << 16) | (0xc900 >> 2),
693 0x00000000,
694 (0x7e00 << 16) | (0xc900 >> 2),
695 0x00000000,
696 (0x4e00 << 16) | (0xc904 >> 2),
697 0x00000000,
698 (0x5e00 << 16) | (0xc904 >> 2),
699 0x00000000,
700 (0x6e00 << 16) | (0xc904 >> 2),
701 0x00000000,
702 (0x7e00 << 16) | (0xc904 >> 2),
703 0x00000000,
704 (0x4e00 << 16) | (0xc908 >> 2),
705 0x00000000,
706 (0x5e00 << 16) | (0xc908 >> 2),
707 0x00000000,
708 (0x6e00 << 16) | (0xc908 >> 2),
709 0x00000000,
710 (0x7e00 << 16) | (0xc908 >> 2),
711 0x00000000,
712 (0x4e00 << 16) | (0xc90c >> 2),
713 0x00000000,
714 (0x5e00 << 16) | (0xc90c >> 2),
715 0x00000000,
716 (0x6e00 << 16) | (0xc90c >> 2),
717 0x00000000,
718 (0x7e00 << 16) | (0xc90c >> 2),
719 0x00000000,
720 (0x4e00 << 16) | (0xc910 >> 2),
721 0x00000000,
722 (0x5e00 << 16) | (0xc910 >> 2),
723 0x00000000,
724 (0x6e00 << 16) | (0xc910 >> 2),
725 0x00000000,
726 (0x7e00 << 16) | (0xc910 >> 2),
727 0x00000000,
728 (0x0e00 << 16) | (0xc99c >> 2),
729 0x00000000,
730 (0x0e00 << 16) | (0x9834 >> 2),
731 0x00000000,
732 (0x0000 << 16) | (0x30f00 >> 2),
733 0x00000000,
734 (0x0000 << 16) | (0x30f04 >> 2),
735 0x00000000,
736 (0x0000 << 16) | (0x30f08 >> 2),
737 0x00000000,
738 (0x0000 << 16) | (0x30f0c >> 2),
739 0x00000000,
740 (0x0600 << 16) | (0x9b7c >> 2),
741 0x00000000,
742 (0x0e00 << 16) | (0x8a14 >> 2),
743 0x00000000,
744 (0x0e00 << 16) | (0x8a18 >> 2),
745 0x00000000,
746 (0x0600 << 16) | (0x30a00 >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x8bf0 >> 2),
749 0x00000000,
750 (0x0e00 << 16) | (0x8bcc >> 2),
751 0x00000000,
752 (0x0e00 << 16) | (0x8b24 >> 2),
753 0x00000000,
754 (0x0e00 << 16) | (0x30a04 >> 2),
755 0x00000000,
756 (0x0600 << 16) | (0x30a10 >> 2),
757 0x00000000,
758 (0x0600 << 16) | (0x30a14 >> 2),
759 0x00000000,
760 (0x0600 << 16) | (0x30a18 >> 2),
761 0x00000000,
762 (0x0600 << 16) | (0x30a2c >> 2),
763 0x00000000,
764 (0x0e00 << 16) | (0xc700 >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0xc704 >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0xc708 >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0xc768 >> 2),
771 0x00000000,
772 (0x0400 << 16) | (0xc770 >> 2),
773 0x00000000,
774 (0x0400 << 16) | (0xc774 >> 2),
775 0x00000000,
776 (0x0400 << 16) | (0xc798 >> 2),
777 0x00000000,
778 (0x0400 << 16) | (0xc79c >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x9100 >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x3c010 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0x8c00 >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0x8c04 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0x8c20 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0x8c38 >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0x8c3c >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xae00 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0x9604 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xac08 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xac0c >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0xac10 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0xac14 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0xac58 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0xac68 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0xac6c >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xac70 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0xac74 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xac78 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xac7c >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xac80 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0xac84 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xac88 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0xac8c >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0x970c >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0x9714 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0x9718 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0x971c >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0x31068 >> 2),
837 0x00000000,
838 (0x4e00 << 16) | (0x31068 >> 2),
839 0x00000000,
840 (0x5e00 << 16) | (0x31068 >> 2),
841 0x00000000,
842 (0x6e00 << 16) | (0x31068 >> 2),
843 0x00000000,
844 (0x7e00 << 16) | (0x31068 >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0xcd10 >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0xcd14 >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0x88b0 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0x88b4 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0x88b8 >> 2),
855 0x00000000,
856 (0x0e00 << 16) | (0x88bc >> 2),
857 0x00000000,
858 (0x0400 << 16) | (0x89c0 >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x88c4 >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x88c8 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x88d0 >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0x88d4 >> 2),
867 0x00000000,
868 (0x0e00 << 16) | (0x88d8 >> 2),
869 0x00000000,
870 (0x0e00 << 16) | (0x8980 >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x30938 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x3093c >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0x30940 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x89a0 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x30900 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x30904 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x89b4 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x3e1fc >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x3c210 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x3c214 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x3c218 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x8904 >> 2),
895 0x00000000,
896 0x5,
897 (0x0e00 << 16) | (0x8c28 >> 2),
898 (0x0e00 << 16) | (0x8c2c >> 2),
899 (0x0e00 << 16) | (0x8c30 >> 2),
900 (0x0e00 << 16) | (0x8c34 >> 2),
901 (0x0e00 << 16) | (0x9600 >> 2),
902};
903
Alex Deucher0aafd312013-04-09 14:43:30 -0400904static const u32 bonaire_golden_spm_registers[] =
905{
906 0x30800, 0xe0ffffff, 0xe0000000
907};
908
909static const u32 bonaire_golden_common_registers[] =
910{
911 0xc770, 0xffffffff, 0x00000800,
912 0xc774, 0xffffffff, 0x00000800,
913 0xc798, 0xffffffff, 0x00007fbf,
914 0xc79c, 0xffffffff, 0x00007faf
915};
916
917static const u32 bonaire_golden_registers[] =
918{
919 0x3354, 0x00000333, 0x00000333,
920 0x3350, 0x000c0fc0, 0x00040200,
921 0x9a10, 0x00010000, 0x00058208,
922 0x3c000, 0xffff1fff, 0x00140000,
923 0x3c200, 0xfdfc0fff, 0x00000100,
924 0x3c234, 0x40000000, 0x40000200,
925 0x9830, 0xffffffff, 0x00000000,
926 0x9834, 0xf00fffff, 0x00000400,
927 0x9838, 0x0002021c, 0x00020200,
928 0xc78, 0x00000080, 0x00000000,
929 0x5bb0, 0x000000f0, 0x00000070,
930 0x5bc0, 0xf0311fff, 0x80300000,
931 0x98f8, 0x73773777, 0x12010001,
932 0x350c, 0x00810000, 0x408af000,
933 0x7030, 0x31000111, 0x00000011,
934 0x2f48, 0x73773777, 0x12010001,
935 0x220c, 0x00007fb6, 0x0021a1b1,
936 0x2210, 0x00007fb6, 0x002021b1,
937 0x2180, 0x00007fb6, 0x00002191,
938 0x2218, 0x00007fb6, 0x002121b1,
939 0x221c, 0x00007fb6, 0x002021b1,
940 0x21dc, 0x00007fb6, 0x00002191,
941 0x21e0, 0x00007fb6, 0x00002191,
942 0x3628, 0x0000003f, 0x0000000a,
943 0x362c, 0x0000003f, 0x0000000a,
944 0x2ae4, 0x00073ffe, 0x000022a2,
945 0x240c, 0x000007ff, 0x00000000,
946 0x8a14, 0xf000003f, 0x00000007,
947 0x8bf0, 0x00002001, 0x00000001,
948 0x8b24, 0xffffffff, 0x00ffffff,
949 0x30a04, 0x0000ff0f, 0x00000000,
950 0x28a4c, 0x07ffffff, 0x06000000,
951 0x4d8, 0x00000fff, 0x00000100,
952 0x3e78, 0x00000001, 0x00000002,
953 0x9100, 0x03000000, 0x0362c688,
954 0x8c00, 0x000000ff, 0x00000001,
955 0xe40, 0x00001fff, 0x00001fff,
956 0x9060, 0x0000007f, 0x00000020,
957 0x9508, 0x00010000, 0x00010000,
958 0xac14, 0x000003ff, 0x000000f3,
959 0xac0c, 0xffffffff, 0x00001032
960};
961
962static const u32 bonaire_mgcg_cgcg_init[] =
963{
964 0xc420, 0xffffffff, 0xfffffffc,
965 0x30800, 0xffffffff, 0xe0000000,
966 0x3c2a0, 0xffffffff, 0x00000100,
967 0x3c208, 0xffffffff, 0x00000100,
968 0x3c2c0, 0xffffffff, 0xc0000100,
969 0x3c2c8, 0xffffffff, 0xc0000100,
970 0x3c2c4, 0xffffffff, 0xc0000100,
971 0x55e4, 0xffffffff, 0x00600100,
972 0x3c280, 0xffffffff, 0x00000100,
973 0x3c214, 0xffffffff, 0x06000100,
974 0x3c220, 0xffffffff, 0x00000100,
975 0x3c218, 0xffffffff, 0x06000100,
976 0x3c204, 0xffffffff, 0x00000100,
977 0x3c2e0, 0xffffffff, 0x00000100,
978 0x3c224, 0xffffffff, 0x00000100,
979 0x3c200, 0xffffffff, 0x00000100,
980 0x3c230, 0xffffffff, 0x00000100,
981 0x3c234, 0xffffffff, 0x00000100,
982 0x3c250, 0xffffffff, 0x00000100,
983 0x3c254, 0xffffffff, 0x00000100,
984 0x3c258, 0xffffffff, 0x00000100,
985 0x3c25c, 0xffffffff, 0x00000100,
986 0x3c260, 0xffffffff, 0x00000100,
987 0x3c27c, 0xffffffff, 0x00000100,
988 0x3c278, 0xffffffff, 0x00000100,
989 0x3c210, 0xffffffff, 0x06000100,
990 0x3c290, 0xffffffff, 0x00000100,
991 0x3c274, 0xffffffff, 0x00000100,
992 0x3c2b4, 0xffffffff, 0x00000100,
993 0x3c2b0, 0xffffffff, 0x00000100,
994 0x3c270, 0xffffffff, 0x00000100,
995 0x30800, 0xffffffff, 0xe0000000,
996 0x3c020, 0xffffffff, 0x00010000,
997 0x3c024, 0xffffffff, 0x00030002,
998 0x3c028, 0xffffffff, 0x00040007,
999 0x3c02c, 0xffffffff, 0x00060005,
1000 0x3c030, 0xffffffff, 0x00090008,
1001 0x3c034, 0xffffffff, 0x00010000,
1002 0x3c038, 0xffffffff, 0x00030002,
1003 0x3c03c, 0xffffffff, 0x00040007,
1004 0x3c040, 0xffffffff, 0x00060005,
1005 0x3c044, 0xffffffff, 0x00090008,
1006 0x3c048, 0xffffffff, 0x00010000,
1007 0x3c04c, 0xffffffff, 0x00030002,
1008 0x3c050, 0xffffffff, 0x00040007,
1009 0x3c054, 0xffffffff, 0x00060005,
1010 0x3c058, 0xffffffff, 0x00090008,
1011 0x3c05c, 0xffffffff, 0x00010000,
1012 0x3c060, 0xffffffff, 0x00030002,
1013 0x3c064, 0xffffffff, 0x00040007,
1014 0x3c068, 0xffffffff, 0x00060005,
1015 0x3c06c, 0xffffffff, 0x00090008,
1016 0x3c070, 0xffffffff, 0x00010000,
1017 0x3c074, 0xffffffff, 0x00030002,
1018 0x3c078, 0xffffffff, 0x00040007,
1019 0x3c07c, 0xffffffff, 0x00060005,
1020 0x3c080, 0xffffffff, 0x00090008,
1021 0x3c084, 0xffffffff, 0x00010000,
1022 0x3c088, 0xffffffff, 0x00030002,
1023 0x3c08c, 0xffffffff, 0x00040007,
1024 0x3c090, 0xffffffff, 0x00060005,
1025 0x3c094, 0xffffffff, 0x00090008,
1026 0x3c098, 0xffffffff, 0x00010000,
1027 0x3c09c, 0xffffffff, 0x00030002,
1028 0x3c0a0, 0xffffffff, 0x00040007,
1029 0x3c0a4, 0xffffffff, 0x00060005,
1030 0x3c0a8, 0xffffffff, 0x00090008,
1031 0x3c000, 0xffffffff, 0x96e00200,
1032 0x8708, 0xffffffff, 0x00900100,
1033 0xc424, 0xffffffff, 0x0020003f,
1034 0x38, 0xffffffff, 0x0140001c,
1035 0x3c, 0x000f0000, 0x000f0000,
1036 0x220, 0xffffffff, 0xC060000C,
1037 0x224, 0xc0000fff, 0x00000100,
1038 0xf90, 0xffffffff, 0x00000100,
1039 0xf98, 0x00000101, 0x00000000,
1040 0x20a8, 0xffffffff, 0x00000104,
1041 0x55e4, 0xff000fff, 0x00000100,
1042 0x30cc, 0xc0000fff, 0x00000104,
1043 0xc1e4, 0x00000001, 0x00000001,
1044 0xd00c, 0xff000ff0, 0x00000100,
1045 0xd80c, 0xff000ff0, 0x00000100
1046};
1047
1048static const u32 spectre_golden_spm_registers[] =
1049{
1050 0x30800, 0xe0ffffff, 0xe0000000
1051};
1052
1053static const u32 spectre_golden_common_registers[] =
1054{
1055 0xc770, 0xffffffff, 0x00000800,
1056 0xc774, 0xffffffff, 0x00000800,
1057 0xc798, 0xffffffff, 0x00007fbf,
1058 0xc79c, 0xffffffff, 0x00007faf
1059};
1060
1061static const u32 spectre_golden_registers[] =
1062{
1063 0x3c000, 0xffff1fff, 0x96940200,
1064 0x3c00c, 0xffff0001, 0xff000000,
1065 0x3c200, 0xfffc0fff, 0x00000100,
1066 0x6ed8, 0x00010101, 0x00010000,
1067 0x9834, 0xf00fffff, 0x00000400,
1068 0x9838, 0xfffffffc, 0x00020200,
1069 0x5bb0, 0x000000f0, 0x00000070,
1070 0x5bc0, 0xf0311fff, 0x80300000,
1071 0x98f8, 0x73773777, 0x12010001,
1072 0x9b7c, 0x00ff0000, 0x00fc0000,
1073 0x2f48, 0x73773777, 0x12010001,
1074 0x8a14, 0xf000003f, 0x00000007,
1075 0x8b24, 0xffffffff, 0x00ffffff,
1076 0x28350, 0x3f3f3fff, 0x00000082,
1077 0x28355, 0x0000003f, 0x00000000,
1078 0x3e78, 0x00000001, 0x00000002,
1079 0x913c, 0xffff03df, 0x00000004,
1080 0xc768, 0x00000008, 0x00000008,
1081 0x8c00, 0x000008ff, 0x00000800,
1082 0x9508, 0x00010000, 0x00010000,
1083 0xac0c, 0xffffffff, 0x54763210,
1084 0x214f8, 0x01ff01ff, 0x00000002,
1085 0x21498, 0x007ff800, 0x00200000,
1086 0x2015c, 0xffffffff, 0x00000f40,
1087 0x30934, 0xffffffff, 0x00000001
1088};
1089
1090static const u32 spectre_mgcg_cgcg_init[] =
1091{
1092 0xc420, 0xffffffff, 0xfffffffc,
1093 0x30800, 0xffffffff, 0xe0000000,
1094 0x3c2a0, 0xffffffff, 0x00000100,
1095 0x3c208, 0xffffffff, 0x00000100,
1096 0x3c2c0, 0xffffffff, 0x00000100,
1097 0x3c2c8, 0xffffffff, 0x00000100,
1098 0x3c2c4, 0xffffffff, 0x00000100,
1099 0x55e4, 0xffffffff, 0x00600100,
1100 0x3c280, 0xffffffff, 0x00000100,
1101 0x3c214, 0xffffffff, 0x06000100,
1102 0x3c220, 0xffffffff, 0x00000100,
1103 0x3c218, 0xffffffff, 0x06000100,
1104 0x3c204, 0xffffffff, 0x00000100,
1105 0x3c2e0, 0xffffffff, 0x00000100,
1106 0x3c224, 0xffffffff, 0x00000100,
1107 0x3c200, 0xffffffff, 0x00000100,
1108 0x3c230, 0xffffffff, 0x00000100,
1109 0x3c234, 0xffffffff, 0x00000100,
1110 0x3c250, 0xffffffff, 0x00000100,
1111 0x3c254, 0xffffffff, 0x00000100,
1112 0x3c258, 0xffffffff, 0x00000100,
1113 0x3c25c, 0xffffffff, 0x00000100,
1114 0x3c260, 0xffffffff, 0x00000100,
1115 0x3c27c, 0xffffffff, 0x00000100,
1116 0x3c278, 0xffffffff, 0x00000100,
1117 0x3c210, 0xffffffff, 0x06000100,
1118 0x3c290, 0xffffffff, 0x00000100,
1119 0x3c274, 0xffffffff, 0x00000100,
1120 0x3c2b4, 0xffffffff, 0x00000100,
1121 0x3c2b0, 0xffffffff, 0x00000100,
1122 0x3c270, 0xffffffff, 0x00000100,
1123 0x30800, 0xffffffff, 0xe0000000,
1124 0x3c020, 0xffffffff, 0x00010000,
1125 0x3c024, 0xffffffff, 0x00030002,
1126 0x3c028, 0xffffffff, 0x00040007,
1127 0x3c02c, 0xffffffff, 0x00060005,
1128 0x3c030, 0xffffffff, 0x00090008,
1129 0x3c034, 0xffffffff, 0x00010000,
1130 0x3c038, 0xffffffff, 0x00030002,
1131 0x3c03c, 0xffffffff, 0x00040007,
1132 0x3c040, 0xffffffff, 0x00060005,
1133 0x3c044, 0xffffffff, 0x00090008,
1134 0x3c048, 0xffffffff, 0x00010000,
1135 0x3c04c, 0xffffffff, 0x00030002,
1136 0x3c050, 0xffffffff, 0x00040007,
1137 0x3c054, 0xffffffff, 0x00060005,
1138 0x3c058, 0xffffffff, 0x00090008,
1139 0x3c05c, 0xffffffff, 0x00010000,
1140 0x3c060, 0xffffffff, 0x00030002,
1141 0x3c064, 0xffffffff, 0x00040007,
1142 0x3c068, 0xffffffff, 0x00060005,
1143 0x3c06c, 0xffffffff, 0x00090008,
1144 0x3c070, 0xffffffff, 0x00010000,
1145 0x3c074, 0xffffffff, 0x00030002,
1146 0x3c078, 0xffffffff, 0x00040007,
1147 0x3c07c, 0xffffffff, 0x00060005,
1148 0x3c080, 0xffffffff, 0x00090008,
1149 0x3c084, 0xffffffff, 0x00010000,
1150 0x3c088, 0xffffffff, 0x00030002,
1151 0x3c08c, 0xffffffff, 0x00040007,
1152 0x3c090, 0xffffffff, 0x00060005,
1153 0x3c094, 0xffffffff, 0x00090008,
1154 0x3c098, 0xffffffff, 0x00010000,
1155 0x3c09c, 0xffffffff, 0x00030002,
1156 0x3c0a0, 0xffffffff, 0x00040007,
1157 0x3c0a4, 0xffffffff, 0x00060005,
1158 0x3c0a8, 0xffffffff, 0x00090008,
1159 0x3c0ac, 0xffffffff, 0x00010000,
1160 0x3c0b0, 0xffffffff, 0x00030002,
1161 0x3c0b4, 0xffffffff, 0x00040007,
1162 0x3c0b8, 0xffffffff, 0x00060005,
1163 0x3c0bc, 0xffffffff, 0x00090008,
1164 0x3c000, 0xffffffff, 0x96e00200,
1165 0x8708, 0xffffffff, 0x00900100,
1166 0xc424, 0xffffffff, 0x0020003f,
1167 0x38, 0xffffffff, 0x0140001c,
1168 0x3c, 0x000f0000, 0x000f0000,
1169 0x220, 0xffffffff, 0xC060000C,
1170 0x224, 0xc0000fff, 0x00000100,
1171 0xf90, 0xffffffff, 0x00000100,
1172 0xf98, 0x00000101, 0x00000000,
1173 0x20a8, 0xffffffff, 0x00000104,
1174 0x55e4, 0xff000fff, 0x00000100,
1175 0x30cc, 0xc0000fff, 0x00000104,
1176 0xc1e4, 0x00000001, 0x00000001,
1177 0xd00c, 0xff000ff0, 0x00000100,
1178 0xd80c, 0xff000ff0, 0x00000100
1179};
1180
1181static const u32 kalindi_golden_spm_registers[] =
1182{
1183 0x30800, 0xe0ffffff, 0xe0000000
1184};
1185
1186static const u32 kalindi_golden_common_registers[] =
1187{
1188 0xc770, 0xffffffff, 0x00000800,
1189 0xc774, 0xffffffff, 0x00000800,
1190 0xc798, 0xffffffff, 0x00007fbf,
1191 0xc79c, 0xffffffff, 0x00007faf
1192};
1193
1194static const u32 kalindi_golden_registers[] =
1195{
1196 0x3c000, 0xffffdfff, 0x6e944040,
1197 0x55e4, 0xff607fff, 0xfc000100,
1198 0x3c220, 0xff000fff, 0x00000100,
1199 0x3c224, 0xff000fff, 0x00000100,
1200 0x3c200, 0xfffc0fff, 0x00000100,
1201 0x6ed8, 0x00010101, 0x00010000,
1202 0x9830, 0xffffffff, 0x00000000,
1203 0x9834, 0xf00fffff, 0x00000400,
1204 0x5bb0, 0x000000f0, 0x00000070,
1205 0x5bc0, 0xf0311fff, 0x80300000,
1206 0x98f8, 0x73773777, 0x12010001,
1207 0x98fc, 0xffffffff, 0x00000010,
1208 0x9b7c, 0x00ff0000, 0x00fc0000,
1209 0x8030, 0x00001f0f, 0x0000100a,
1210 0x2f48, 0x73773777, 0x12010001,
1211 0x2408, 0x000fffff, 0x000c007f,
1212 0x8a14, 0xf000003f, 0x00000007,
1213 0x8b24, 0x3fff3fff, 0x00ffcfff,
1214 0x30a04, 0x0000ff0f, 0x00000000,
1215 0x28a4c, 0x07ffffff, 0x06000000,
1216 0x4d8, 0x00000fff, 0x00000100,
1217 0x3e78, 0x00000001, 0x00000002,
1218 0xc768, 0x00000008, 0x00000008,
1219 0x8c00, 0x000000ff, 0x00000003,
1220 0x214f8, 0x01ff01ff, 0x00000002,
1221 0x21498, 0x007ff800, 0x00200000,
1222 0x2015c, 0xffffffff, 0x00000f40,
1223 0x88c4, 0x001f3ae3, 0x00000082,
1224 0x88d4, 0x0000001f, 0x00000010,
1225 0x30934, 0xffffffff, 0x00000000
1226};
1227
1228static const u32 kalindi_mgcg_cgcg_init[] =
1229{
1230 0xc420, 0xffffffff, 0xfffffffc,
1231 0x30800, 0xffffffff, 0xe0000000,
1232 0x3c2a0, 0xffffffff, 0x00000100,
1233 0x3c208, 0xffffffff, 0x00000100,
1234 0x3c2c0, 0xffffffff, 0x00000100,
1235 0x3c2c8, 0xffffffff, 0x00000100,
1236 0x3c2c4, 0xffffffff, 0x00000100,
1237 0x55e4, 0xffffffff, 0x00600100,
1238 0x3c280, 0xffffffff, 0x00000100,
1239 0x3c214, 0xffffffff, 0x06000100,
1240 0x3c220, 0xffffffff, 0x00000100,
1241 0x3c218, 0xffffffff, 0x06000100,
1242 0x3c204, 0xffffffff, 0x00000100,
1243 0x3c2e0, 0xffffffff, 0x00000100,
1244 0x3c224, 0xffffffff, 0x00000100,
1245 0x3c200, 0xffffffff, 0x00000100,
1246 0x3c230, 0xffffffff, 0x00000100,
1247 0x3c234, 0xffffffff, 0x00000100,
1248 0x3c250, 0xffffffff, 0x00000100,
1249 0x3c254, 0xffffffff, 0x00000100,
1250 0x3c258, 0xffffffff, 0x00000100,
1251 0x3c25c, 0xffffffff, 0x00000100,
1252 0x3c260, 0xffffffff, 0x00000100,
1253 0x3c27c, 0xffffffff, 0x00000100,
1254 0x3c278, 0xffffffff, 0x00000100,
1255 0x3c210, 0xffffffff, 0x06000100,
1256 0x3c290, 0xffffffff, 0x00000100,
1257 0x3c274, 0xffffffff, 0x00000100,
1258 0x3c2b4, 0xffffffff, 0x00000100,
1259 0x3c2b0, 0xffffffff, 0x00000100,
1260 0x3c270, 0xffffffff, 0x00000100,
1261 0x30800, 0xffffffff, 0xe0000000,
1262 0x3c020, 0xffffffff, 0x00010000,
1263 0x3c024, 0xffffffff, 0x00030002,
1264 0x3c028, 0xffffffff, 0x00040007,
1265 0x3c02c, 0xffffffff, 0x00060005,
1266 0x3c030, 0xffffffff, 0x00090008,
1267 0x3c034, 0xffffffff, 0x00010000,
1268 0x3c038, 0xffffffff, 0x00030002,
1269 0x3c03c, 0xffffffff, 0x00040007,
1270 0x3c040, 0xffffffff, 0x00060005,
1271 0x3c044, 0xffffffff, 0x00090008,
1272 0x3c000, 0xffffffff, 0x96e00200,
1273 0x8708, 0xffffffff, 0x00900100,
1274 0xc424, 0xffffffff, 0x0020003f,
1275 0x38, 0xffffffff, 0x0140001c,
1276 0x3c, 0x000f0000, 0x000f0000,
1277 0x220, 0xffffffff, 0xC060000C,
1278 0x224, 0xc0000fff, 0x00000100,
1279 0x20a8, 0xffffffff, 0x00000104,
1280 0x55e4, 0xff000fff, 0x00000100,
1281 0x30cc, 0xc0000fff, 0x00000104,
1282 0xc1e4, 0x00000001, 0x00000001,
1283 0xd00c, 0xff000ff0, 0x00000100,
1284 0xd80c, 0xff000ff0, 0x00000100
1285};
1286
1287static void cik_init_golden_registers(struct radeon_device *rdev)
1288{
1289 switch (rdev->family) {
1290 case CHIP_BONAIRE:
1291 radeon_program_register_sequence(rdev,
1292 bonaire_mgcg_cgcg_init,
1293 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1294 radeon_program_register_sequence(rdev,
1295 bonaire_golden_registers,
1296 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1297 radeon_program_register_sequence(rdev,
1298 bonaire_golden_common_registers,
1299 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1300 radeon_program_register_sequence(rdev,
1301 bonaire_golden_spm_registers,
1302 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1303 break;
1304 case CHIP_KABINI:
1305 radeon_program_register_sequence(rdev,
1306 kalindi_mgcg_cgcg_init,
1307 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1308 radeon_program_register_sequence(rdev,
1309 kalindi_golden_registers,
1310 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1311 radeon_program_register_sequence(rdev,
1312 kalindi_golden_common_registers,
1313 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1314 radeon_program_register_sequence(rdev,
1315 kalindi_golden_spm_registers,
1316 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1317 break;
1318 case CHIP_KAVERI:
1319 radeon_program_register_sequence(rdev,
1320 spectre_mgcg_cgcg_init,
1321 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1322 radeon_program_register_sequence(rdev,
1323 spectre_golden_registers,
1324 (const u32)ARRAY_SIZE(spectre_golden_registers));
1325 radeon_program_register_sequence(rdev,
1326 spectre_golden_common_registers,
1327 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1328 radeon_program_register_sequence(rdev,
1329 spectre_golden_spm_registers,
1330 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1331 break;
1332 default:
1333 break;
1334 }
1335}
1336
Alex Deucher2c679122013-04-09 13:32:18 -04001337/**
1338 * cik_get_xclk - get the xclk
1339 *
1340 * @rdev: radeon_device pointer
1341 *
1342 * Returns the reference clock used by the gfx engine
1343 * (CIK).
1344 */
1345u32 cik_get_xclk(struct radeon_device *rdev)
1346{
1347 u32 reference_clock = rdev->clock.spll.reference_freq;
1348
1349 if (rdev->flags & RADEON_IS_IGP) {
1350 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1351 return reference_clock / 2;
1352 } else {
1353 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1354 return reference_clock / 4;
1355 }
1356 return reference_clock;
1357}
1358
Alex Deucher75efdee2013-03-04 12:47:46 -05001359/**
1360 * cik_mm_rdoorbell - read a doorbell dword
1361 *
1362 * @rdev: radeon_device pointer
1363 * @offset: byte offset into the aperture
1364 *
1365 * Returns the value in the doorbell aperture at the
1366 * requested offset (CIK).
1367 */
1368u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1369{
1370 if (offset < rdev->doorbell.size) {
1371 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1372 } else {
1373 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1374 return 0;
1375 }
1376}
1377
1378/**
1379 * cik_mm_wdoorbell - write a doorbell dword
1380 *
1381 * @rdev: radeon_device pointer
1382 * @offset: byte offset into the aperture
1383 * @v: value to write
1384 *
1385 * Writes @v to the doorbell aperture at the
1386 * requested offset (CIK).
1387 */
1388void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1389{
1390 if (offset < rdev->doorbell.size) {
1391 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1392 } else {
1393 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1394 }
1395}
1396
Alex Deucherbc8273f2012-06-29 19:44:04 -04001397#define BONAIRE_IO_MC_REGS_SIZE 36
1398
1399static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1400{
1401 {0x00000070, 0x04400000},
1402 {0x00000071, 0x80c01803},
1403 {0x00000072, 0x00004004},
1404 {0x00000073, 0x00000100},
1405 {0x00000074, 0x00ff0000},
1406 {0x00000075, 0x34000000},
1407 {0x00000076, 0x08000014},
1408 {0x00000077, 0x00cc08ec},
1409 {0x00000078, 0x00000400},
1410 {0x00000079, 0x00000000},
1411 {0x0000007a, 0x04090000},
1412 {0x0000007c, 0x00000000},
1413 {0x0000007e, 0x4408a8e8},
1414 {0x0000007f, 0x00000304},
1415 {0x00000080, 0x00000000},
1416 {0x00000082, 0x00000001},
1417 {0x00000083, 0x00000002},
1418 {0x00000084, 0xf3e4f400},
1419 {0x00000085, 0x052024e3},
1420 {0x00000087, 0x00000000},
1421 {0x00000088, 0x01000000},
1422 {0x0000008a, 0x1c0a0000},
1423 {0x0000008b, 0xff010000},
1424 {0x0000008d, 0xffffefff},
1425 {0x0000008e, 0xfff3efff},
1426 {0x0000008f, 0xfff3efbf},
1427 {0x00000092, 0xf7ffffff},
1428 {0x00000093, 0xffffff7f},
1429 {0x00000095, 0x00101101},
1430 {0x00000096, 0x00000fff},
1431 {0x00000097, 0x00116fff},
1432 {0x00000098, 0x60010000},
1433 {0x00000099, 0x10010000},
1434 {0x0000009a, 0x00006000},
1435 {0x0000009b, 0x00001000},
1436 {0x0000009f, 0x00b48000}
1437};
1438
Alex Deucherb556b122013-01-29 10:44:22 -05001439/**
1440 * cik_srbm_select - select specific register instances
1441 *
1442 * @rdev: radeon_device pointer
1443 * @me: selected ME (micro engine)
1444 * @pipe: pipe
1445 * @queue: queue
1446 * @vmid: VMID
1447 *
1448 * Switches the currently active registers instances. Some
1449 * registers are instanced per VMID, others are instanced per
1450 * me/pipe/queue combination.
1451 */
1452static void cik_srbm_select(struct radeon_device *rdev,
1453 u32 me, u32 pipe, u32 queue, u32 vmid)
1454{
1455 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1456 MEID(me & 0x3) |
1457 VMID(vmid & 0xf) |
1458 QUEUEID(queue & 0x7));
1459 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1460}
1461
Alex Deucherbc8273f2012-06-29 19:44:04 -04001462/* ucode loading */
1463/**
1464 * ci_mc_load_microcode - load MC ucode into the hw
1465 *
1466 * @rdev: radeon_device pointer
1467 *
1468 * Load the GDDR MC ucode into the hw (CIK).
1469 * Returns 0 on success, error on failure.
1470 */
1471static int ci_mc_load_microcode(struct radeon_device *rdev)
1472{
1473 const __be32 *fw_data;
1474 u32 running, blackout = 0;
1475 u32 *io_mc_regs;
1476 int i, ucode_size, regs_size;
1477
1478 if (!rdev->mc_fw)
1479 return -EINVAL;
1480
1481 switch (rdev->family) {
1482 case CHIP_BONAIRE:
1483 default:
1484 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1485 ucode_size = CIK_MC_UCODE_SIZE;
1486 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1487 break;
1488 }
1489
1490 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1491
1492 if (running == 0) {
1493 if (running) {
1494 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1495 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1496 }
1497
1498 /* reset the engine and set to writable */
1499 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1500 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1501
1502 /* load mc io regs */
1503 for (i = 0; i < regs_size; i++) {
1504 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1505 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1506 }
1507 /* load the MC ucode */
1508 fw_data = (const __be32 *)rdev->mc_fw->data;
1509 for (i = 0; i < ucode_size; i++)
1510 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1511
1512 /* put the engine back into the active state */
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1514 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1515 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1516
1517 /* wait for training to complete */
1518 for (i = 0; i < rdev->usec_timeout; i++) {
1519 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1520 break;
1521 udelay(1);
1522 }
1523 for (i = 0; i < rdev->usec_timeout; i++) {
1524 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1525 break;
1526 udelay(1);
1527 }
1528
1529 if (running)
1530 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1531 }
1532
1533 return 0;
1534}
1535
Alex Deucher02c81322012-12-18 21:43:07 -05001536/**
1537 * cik_init_microcode - load ucode images from disk
1538 *
1539 * @rdev: radeon_device pointer
1540 *
1541 * Use the firmware interface to load the ucode images into
1542 * the driver (not loaded into hw).
1543 * Returns 0 on success, error on failure.
1544 */
1545static int cik_init_microcode(struct radeon_device *rdev)
1546{
Alex Deucher02c81322012-12-18 21:43:07 -05001547 const char *chip_name;
1548 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -04001549 mec_req_size, rlc_req_size, mc_req_size,
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001550 sdma_req_size, smc_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -05001551 char fw_name[30];
1552 int err;
1553
1554 DRM_DEBUG("\n");
1555
Alex Deucher02c81322012-12-18 21:43:07 -05001556 switch (rdev->family) {
1557 case CHIP_BONAIRE:
1558 chip_name = "BONAIRE";
1559 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1560 me_req_size = CIK_ME_UCODE_SIZE * 4;
1561 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1562 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1563 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1564 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001565 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001566 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
Alex Deucher02c81322012-12-18 21:43:07 -05001567 break;
1568 case CHIP_KAVERI:
1569 chip_name = "KAVERI";
1570 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1571 me_req_size = CIK_ME_UCODE_SIZE * 4;
1572 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1573 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1574 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001575 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001576 break;
1577 case CHIP_KABINI:
1578 chip_name = "KABINI";
1579 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1580 me_req_size = CIK_ME_UCODE_SIZE * 4;
1581 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1582 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1583 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -04001584 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -05001585 break;
1586 default: BUG();
1587 }
1588
1589 DRM_INFO("Loading %s Microcode\n", chip_name);
1590
1591 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001592 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001593 if (err)
1594 goto out;
1595 if (rdev->pfp_fw->size != pfp_req_size) {
1596 printk(KERN_ERR
1597 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1598 rdev->pfp_fw->size, fw_name);
1599 err = -EINVAL;
1600 goto out;
1601 }
1602
1603 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001604 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001605 if (err)
1606 goto out;
1607 if (rdev->me_fw->size != me_req_size) {
1608 printk(KERN_ERR
1609 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1610 rdev->me_fw->size, fw_name);
1611 err = -EINVAL;
1612 }
1613
1614 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001615 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001616 if (err)
1617 goto out;
1618 if (rdev->ce_fw->size != ce_req_size) {
1619 printk(KERN_ERR
1620 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1621 rdev->ce_fw->size, fw_name);
1622 err = -EINVAL;
1623 }
1624
1625 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001626 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001627 if (err)
1628 goto out;
1629 if (rdev->mec_fw->size != mec_req_size) {
1630 printk(KERN_ERR
1631 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1632 rdev->mec_fw->size, fw_name);
1633 err = -EINVAL;
1634 }
1635
1636 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001637 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001638 if (err)
1639 goto out;
1640 if (rdev->rlc_fw->size != rlc_req_size) {
1641 printk(KERN_ERR
1642 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1643 rdev->rlc_fw->size, fw_name);
1644 err = -EINVAL;
1645 }
1646
Alex Deucher21a93e12013-04-09 12:47:11 -04001647 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001648 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -04001649 if (err)
1650 goto out;
1651 if (rdev->sdma_fw->size != sdma_req_size) {
1652 printk(KERN_ERR
1653 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1654 rdev->sdma_fw->size, fw_name);
1655 err = -EINVAL;
1656 }
1657
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001658 /* No SMC, MC ucode on APUs */
Alex Deucher02c81322012-12-18 21:43:07 -05001659 if (!(rdev->flags & RADEON_IS_IGP)) {
1660 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -04001661 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -05001662 if (err)
1663 goto out;
1664 if (rdev->mc_fw->size != mc_req_size) {
1665 printk(KERN_ERR
1666 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1667 rdev->mc_fw->size, fw_name);
1668 err = -EINVAL;
1669 }
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001670
1671 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673 if (err) {
1674 printk(KERN_ERR
1675 "smc: error loading firmware \"%s\"\n",
1676 fw_name);
1677 release_firmware(rdev->smc_fw);
1678 rdev->smc_fw = NULL;
1679 } else if (rdev->smc_fw->size != smc_req_size) {
1680 printk(KERN_ERR
1681 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1682 rdev->smc_fw->size, fw_name);
1683 err = -EINVAL;
1684 }
Alex Deucher02c81322012-12-18 21:43:07 -05001685 }
1686
1687out:
Alex Deucher02c81322012-12-18 21:43:07 -05001688 if (err) {
1689 if (err != -EINVAL)
1690 printk(KERN_ERR
1691 "cik_cp: Failed to load firmware \"%s\"\n",
1692 fw_name);
1693 release_firmware(rdev->pfp_fw);
1694 rdev->pfp_fw = NULL;
1695 release_firmware(rdev->me_fw);
1696 rdev->me_fw = NULL;
1697 release_firmware(rdev->ce_fw);
1698 rdev->ce_fw = NULL;
1699 release_firmware(rdev->rlc_fw);
1700 rdev->rlc_fw = NULL;
1701 release_firmware(rdev->mc_fw);
1702 rdev->mc_fw = NULL;
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04001703 release_firmware(rdev->smc_fw);
1704 rdev->smc_fw = NULL;
Alex Deucher02c81322012-12-18 21:43:07 -05001705 }
1706 return err;
1707}
1708
Alex Deucher8cc1a532013-04-09 12:41:24 -04001709/*
1710 * Core functions
1711 */
1712/**
1713 * cik_tiling_mode_table_init - init the hw tiling table
1714 *
1715 * @rdev: radeon_device pointer
1716 *
1717 * Starting with SI, the tiling setup is done globally in a
1718 * set of 32 tiling modes. Rather than selecting each set of
1719 * parameters per surface as on older asics, we just select
1720 * which index in the tiling table we want to use, and the
1721 * surface uses those parameters (CIK).
1722 */
1723static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1724{
1725 const u32 num_tile_mode_states = 32;
1726 const u32 num_secondary_tile_mode_states = 16;
1727 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1728 u32 num_pipe_configs;
1729 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1730 rdev->config.cik.max_shader_engines;
1731
1732 switch (rdev->config.cik.mem_row_size_in_kb) {
1733 case 1:
1734 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1735 break;
1736 case 2:
1737 default:
1738 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1739 break;
1740 case 4:
1741 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1742 break;
1743 }
1744
1745 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1746 if (num_pipe_configs > 8)
1747 num_pipe_configs = 8; /* ??? */
1748
1749 if (num_pipe_configs == 8) {
1750 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1751 switch (reg_offset) {
1752 case 0:
1753 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1755 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1756 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1757 break;
1758 case 1:
1759 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1760 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1761 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1762 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1763 break;
1764 case 2:
1765 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1766 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1767 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1768 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1769 break;
1770 case 3:
1771 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1773 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1775 break;
1776 case 4:
1777 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1779 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1780 TILE_SPLIT(split_equal_to_row_size));
1781 break;
1782 case 5:
1783 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1784 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1785 break;
1786 case 6:
1787 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1788 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1791 break;
1792 case 7:
1793 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1794 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1795 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1796 TILE_SPLIT(split_equal_to_row_size));
1797 break;
1798 case 8:
1799 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1800 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1801 break;
1802 case 9:
1803 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1805 break;
1806 case 10:
1807 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1809 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1811 break;
1812 case 11:
1813 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1814 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1815 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1817 break;
1818 case 12:
1819 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1821 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1823 break;
1824 case 13:
1825 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1826 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1827 break;
1828 case 14:
1829 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1831 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1833 break;
1834 case 16:
1835 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1837 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1838 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1839 break;
1840 case 17:
1841 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1845 break;
1846 case 27:
1847 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1848 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1849 break;
1850 case 28:
1851 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1852 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1853 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1855 break;
1856 case 29:
1857 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1859 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1861 break;
1862 case 30:
1863 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1864 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1865 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867 break;
1868 default:
1869 gb_tile_moden = 0;
1870 break;
1871 }
Alex Deucher39aee492013-04-10 13:41:25 -04001872 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001873 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1874 }
1875 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1876 switch (reg_offset) {
1877 case 0:
1878 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1879 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1880 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1881 NUM_BANKS(ADDR_SURF_16_BANK));
1882 break;
1883 case 1:
1884 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1887 NUM_BANKS(ADDR_SURF_16_BANK));
1888 break;
1889 case 2:
1890 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1891 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1892 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1893 NUM_BANKS(ADDR_SURF_16_BANK));
1894 break;
1895 case 3:
1896 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1899 NUM_BANKS(ADDR_SURF_16_BANK));
1900 break;
1901 case 4:
1902 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1903 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1904 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1905 NUM_BANKS(ADDR_SURF_8_BANK));
1906 break;
1907 case 5:
1908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1911 NUM_BANKS(ADDR_SURF_4_BANK));
1912 break;
1913 case 6:
1914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1917 NUM_BANKS(ADDR_SURF_2_BANK));
1918 break;
1919 case 8:
1920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1923 NUM_BANKS(ADDR_SURF_16_BANK));
1924 break;
1925 case 9:
1926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1929 NUM_BANKS(ADDR_SURF_16_BANK));
1930 break;
1931 case 10:
1932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1935 NUM_BANKS(ADDR_SURF_16_BANK));
1936 break;
1937 case 11:
1938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1941 NUM_BANKS(ADDR_SURF_16_BANK));
1942 break;
1943 case 12:
1944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1947 NUM_BANKS(ADDR_SURF_8_BANK));
1948 break;
1949 case 13:
1950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1953 NUM_BANKS(ADDR_SURF_4_BANK));
1954 break;
1955 case 14:
1956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1959 NUM_BANKS(ADDR_SURF_2_BANK));
1960 break;
1961 default:
1962 gb_tile_moden = 0;
1963 break;
1964 }
1965 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1966 }
1967 } else if (num_pipe_configs == 4) {
1968 if (num_rbs == 4) {
1969 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1970 switch (reg_offset) {
1971 case 0:
1972 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1974 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1975 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1976 break;
1977 case 1:
1978 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1979 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1980 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1981 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1982 break;
1983 case 2:
1984 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1986 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1987 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1988 break;
1989 case 3:
1990 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1992 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1993 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1994 break;
1995 case 4:
1996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1998 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1999 TILE_SPLIT(split_equal_to_row_size));
2000 break;
2001 case 5:
2002 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2004 break;
2005 case 6:
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2010 break;
2011 case 7:
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2015 TILE_SPLIT(split_equal_to_row_size));
2016 break;
2017 case 8:
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2019 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2020 break;
2021 case 9:
2022 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2023 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2024 break;
2025 case 10:
2026 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2028 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2030 break;
2031 case 11:
2032 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2034 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2036 break;
2037 case 12:
2038 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2039 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2040 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2042 break;
2043 case 13:
2044 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2045 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2046 break;
2047 case 14:
2048 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2050 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2052 break;
2053 case 16:
2054 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2056 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 break;
2059 case 17:
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 break;
2065 case 27:
2066 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2068 break;
2069 case 28:
2070 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2071 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2072 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074 break;
2075 case 29:
2076 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2078 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 break;
2081 case 30:
2082 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2084 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 break;
2087 default:
2088 gb_tile_moden = 0;
2089 break;
2090 }
Alex Deucher39aee492013-04-10 13:41:25 -04002091 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002092 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2093 }
2094 } else if (num_rbs < 4) {
2095 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2096 switch (reg_offset) {
2097 case 0:
2098 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2100 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2101 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2102 break;
2103 case 1:
2104 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2106 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2108 break;
2109 case 2:
2110 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2112 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2114 break;
2115 case 3:
2116 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2118 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2120 break;
2121 case 4:
2122 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2124 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2125 TILE_SPLIT(split_equal_to_row_size));
2126 break;
2127 case 5:
2128 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130 break;
2131 case 6:
2132 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2134 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2135 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2136 break;
2137 case 7:
2138 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2140 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2141 TILE_SPLIT(split_equal_to_row_size));
2142 break;
2143 case 8:
2144 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2145 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2146 break;
2147 case 9:
2148 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2150 break;
2151 case 10:
2152 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2154 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156 break;
2157 case 11:
2158 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2162 break;
2163 case 12:
2164 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2166 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168 break;
2169 case 13:
2170 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2172 break;
2173 case 14:
2174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 break;
2179 case 16:
2180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 break;
2185 case 17:
2186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 break;
2191 case 27:
2192 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2194 break;
2195 case 28:
2196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2198 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 break;
2201 case 29:
2202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2204 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 break;
2207 case 30:
2208 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2210 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212 break;
2213 default:
2214 gb_tile_moden = 0;
2215 break;
2216 }
Alex Deucher39aee492013-04-10 13:41:25 -04002217 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002218 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2219 }
2220 }
2221 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2222 switch (reg_offset) {
2223 case 0:
2224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2227 NUM_BANKS(ADDR_SURF_16_BANK));
2228 break;
2229 case 1:
2230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2233 NUM_BANKS(ADDR_SURF_16_BANK));
2234 break;
2235 case 2:
2236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2239 NUM_BANKS(ADDR_SURF_16_BANK));
2240 break;
2241 case 3:
2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245 NUM_BANKS(ADDR_SURF_16_BANK));
2246 break;
2247 case 4:
2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251 NUM_BANKS(ADDR_SURF_16_BANK));
2252 break;
2253 case 5:
2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_8_BANK));
2258 break;
2259 case 6:
2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263 NUM_BANKS(ADDR_SURF_4_BANK));
2264 break;
2265 case 8:
2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269 NUM_BANKS(ADDR_SURF_16_BANK));
2270 break;
2271 case 9:
2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2275 NUM_BANKS(ADDR_SURF_16_BANK));
2276 break;
2277 case 10:
2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 break;
2283 case 11:
2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2288 break;
2289 case 12:
2290 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2294 break;
2295 case 13:
2296 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2299 NUM_BANKS(ADDR_SURF_8_BANK));
2300 break;
2301 case 14:
2302 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2305 NUM_BANKS(ADDR_SURF_4_BANK));
2306 break;
2307 default:
2308 gb_tile_moden = 0;
2309 break;
2310 }
2311 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2312 }
2313 } else if (num_pipe_configs == 2) {
2314 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2315 switch (reg_offset) {
2316 case 0:
2317 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2321 break;
2322 case 1:
2323 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2327 break;
2328 case 2:
2329 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2333 break;
2334 case 3:
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P2) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2339 break;
2340 case 4:
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P2) |
2344 TILE_SPLIT(split_equal_to_row_size));
2345 break;
2346 case 5:
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 break;
2350 case 6:
2351 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2353 PIPE_CONFIG(ADDR_SURF_P2) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2355 break;
2356 case 7:
2357 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P2) |
2360 TILE_SPLIT(split_equal_to_row_size));
2361 break;
2362 case 8:
2363 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2364 break;
2365 case 9:
2366 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368 break;
2369 case 10:
2370 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P2) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 break;
2375 case 11:
2376 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 PIPE_CONFIG(ADDR_SURF_P2) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 break;
2381 case 12:
2382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 PIPE_CONFIG(ADDR_SURF_P2) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 break;
2387 case 13:
2388 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2390 break;
2391 case 14:
2392 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394 PIPE_CONFIG(ADDR_SURF_P2) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 break;
2397 case 16:
2398 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2400 PIPE_CONFIG(ADDR_SURF_P2) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 break;
2403 case 17:
2404 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2406 PIPE_CONFIG(ADDR_SURF_P2) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2408 break;
2409 case 27:
2410 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2412 break;
2413 case 28:
2414 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 PIPE_CONFIG(ADDR_SURF_P2) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 break;
2419 case 29:
2420 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2422 PIPE_CONFIG(ADDR_SURF_P2) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2424 break;
2425 case 30:
2426 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 PIPE_CONFIG(ADDR_SURF_P2) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2430 break;
2431 default:
2432 gb_tile_moden = 0;
2433 break;
2434 }
Alex Deucher39aee492013-04-10 13:41:25 -04002435 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002436 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2437 }
2438 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2439 switch (reg_offset) {
2440 case 0:
2441 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2444 NUM_BANKS(ADDR_SURF_16_BANK));
2445 break;
2446 case 1:
2447 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2450 NUM_BANKS(ADDR_SURF_16_BANK));
2451 break;
2452 case 2:
2453 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2456 NUM_BANKS(ADDR_SURF_16_BANK));
2457 break;
2458 case 3:
2459 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2462 NUM_BANKS(ADDR_SURF_16_BANK));
2463 break;
2464 case 4:
2465 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2468 NUM_BANKS(ADDR_SURF_16_BANK));
2469 break;
2470 case 5:
2471 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 NUM_BANKS(ADDR_SURF_16_BANK));
2475 break;
2476 case 6:
2477 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 break;
2482 case 8:
2483 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2486 NUM_BANKS(ADDR_SURF_16_BANK));
2487 break;
2488 case 9:
2489 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2492 NUM_BANKS(ADDR_SURF_16_BANK));
2493 break;
2494 case 10:
2495 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2498 NUM_BANKS(ADDR_SURF_16_BANK));
2499 break;
2500 case 11:
2501 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2504 NUM_BANKS(ADDR_SURF_16_BANK));
2505 break;
2506 case 12:
2507 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_16_BANK));
2511 break;
2512 case 13:
2513 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2516 NUM_BANKS(ADDR_SURF_16_BANK));
2517 break;
2518 case 14:
2519 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 break;
2524 default:
2525 gb_tile_moden = 0;
2526 break;
2527 }
2528 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2529 }
2530 } else
2531 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2532}
2533
2534/**
2535 * cik_select_se_sh - select which SE, SH to address
2536 *
2537 * @rdev: radeon_device pointer
2538 * @se_num: shader engine to address
2539 * @sh_num: sh block to address
2540 *
2541 * Select which SE, SH combinations to address. Certain
2542 * registers are instanced per SE or SH. 0xffffffff means
2543 * broadcast to all SEs or SHs (CIK).
2544 */
2545static void cik_select_se_sh(struct radeon_device *rdev,
2546 u32 se_num, u32 sh_num)
2547{
2548 u32 data = INSTANCE_BROADCAST_WRITES;
2549
2550 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04002551 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04002552 else if (se_num == 0xffffffff)
2553 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2554 else if (sh_num == 0xffffffff)
2555 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2556 else
2557 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2558 WREG32(GRBM_GFX_INDEX, data);
2559}
2560
2561/**
2562 * cik_create_bitmask - create a bitmask
2563 *
2564 * @bit_width: length of the mask
2565 *
2566 * create a variable length bit mask (CIK).
2567 * Returns the bitmask.
2568 */
2569static u32 cik_create_bitmask(u32 bit_width)
2570{
2571 u32 i, mask = 0;
2572
2573 for (i = 0; i < bit_width; i++) {
2574 mask <<= 1;
2575 mask |= 1;
2576 }
2577 return mask;
2578}
2579
2580/**
2581 * cik_select_se_sh - select which SE, SH to address
2582 *
2583 * @rdev: radeon_device pointer
2584 * @max_rb_num: max RBs (render backends) for the asic
2585 * @se_num: number of SEs (shader engines) for the asic
2586 * @sh_per_se: number of SH blocks per SE for the asic
2587 *
2588 * Calculates the bitmask of disabled RBs (CIK).
2589 * Returns the disabled RB bitmask.
2590 */
2591static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2592 u32 max_rb_num, u32 se_num,
2593 u32 sh_per_se)
2594{
2595 u32 data, mask;
2596
2597 data = RREG32(CC_RB_BACKEND_DISABLE);
2598 if (data & 1)
2599 data &= BACKEND_DISABLE_MASK;
2600 else
2601 data = 0;
2602 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2603
2604 data >>= BACKEND_DISABLE_SHIFT;
2605
2606 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2607
2608 return data & mask;
2609}
2610
2611/**
2612 * cik_setup_rb - setup the RBs on the asic
2613 *
2614 * @rdev: radeon_device pointer
2615 * @se_num: number of SEs (shader engines) for the asic
2616 * @sh_per_se: number of SH blocks per SE for the asic
2617 * @max_rb_num: max RBs (render backends) for the asic
2618 *
2619 * Configures per-SE/SH RB registers (CIK).
2620 */
2621static void cik_setup_rb(struct radeon_device *rdev,
2622 u32 se_num, u32 sh_per_se,
2623 u32 max_rb_num)
2624{
2625 int i, j;
2626 u32 data, mask;
2627 u32 disabled_rbs = 0;
2628 u32 enabled_rbs = 0;
2629
2630 for (i = 0; i < se_num; i++) {
2631 for (j = 0; j < sh_per_se; j++) {
2632 cik_select_se_sh(rdev, i, j);
2633 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2634 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2635 }
2636 }
2637 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2638
2639 mask = 1;
2640 for (i = 0; i < max_rb_num; i++) {
2641 if (!(disabled_rbs & mask))
2642 enabled_rbs |= mask;
2643 mask <<= 1;
2644 }
2645
2646 for (i = 0; i < se_num; i++) {
2647 cik_select_se_sh(rdev, i, 0xffffffff);
2648 data = 0;
2649 for (j = 0; j < sh_per_se; j++) {
2650 switch (enabled_rbs & 3) {
2651 case 1:
2652 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2653 break;
2654 case 2:
2655 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2656 break;
2657 case 3:
2658 default:
2659 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2660 break;
2661 }
2662 enabled_rbs >>= 2;
2663 }
2664 WREG32(PA_SC_RASTER_CONFIG, data);
2665 }
2666 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2667}
2668
2669/**
2670 * cik_gpu_init - setup the 3D engine
2671 *
2672 * @rdev: radeon_device pointer
2673 *
2674 * Configures the 3D engine and tiling configuration
2675 * registers so that the 3D engine is usable.
2676 */
2677static void cik_gpu_init(struct radeon_device *rdev)
2678{
2679 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2680 u32 mc_shared_chmap, mc_arb_ramcfg;
2681 u32 hdp_host_path_cntl;
2682 u32 tmp;
2683 int i, j;
2684
2685 switch (rdev->family) {
2686 case CHIP_BONAIRE:
2687 rdev->config.cik.max_shader_engines = 2;
2688 rdev->config.cik.max_tile_pipes = 4;
2689 rdev->config.cik.max_cu_per_sh = 7;
2690 rdev->config.cik.max_sh_per_se = 1;
2691 rdev->config.cik.max_backends_per_se = 2;
2692 rdev->config.cik.max_texture_channel_caches = 4;
2693 rdev->config.cik.max_gprs = 256;
2694 rdev->config.cik.max_gs_threads = 32;
2695 rdev->config.cik.max_hw_contexts = 8;
2696
2697 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2698 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2699 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2700 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2701 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2702 break;
2703 case CHIP_KAVERI:
2704 /* TODO */
2705 break;
2706 case CHIP_KABINI:
2707 default:
2708 rdev->config.cik.max_shader_engines = 1;
2709 rdev->config.cik.max_tile_pipes = 2;
2710 rdev->config.cik.max_cu_per_sh = 2;
2711 rdev->config.cik.max_sh_per_se = 1;
2712 rdev->config.cik.max_backends_per_se = 1;
2713 rdev->config.cik.max_texture_channel_caches = 2;
2714 rdev->config.cik.max_gprs = 256;
2715 rdev->config.cik.max_gs_threads = 16;
2716 rdev->config.cik.max_hw_contexts = 8;
2717
2718 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2719 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2720 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2721 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2722 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2723 break;
2724 }
2725
2726 /* Initialize HDP */
2727 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2728 WREG32((0x2c14 + j), 0x00000000);
2729 WREG32((0x2c18 + j), 0x00000000);
2730 WREG32((0x2c1c + j), 0x00000000);
2731 WREG32((0x2c20 + j), 0x00000000);
2732 WREG32((0x2c24 + j), 0x00000000);
2733 }
2734
2735 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2736
2737 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2738
2739 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2740 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2741
2742 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2743 rdev->config.cik.mem_max_burst_length_bytes = 256;
2744 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2745 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2746 if (rdev->config.cik.mem_row_size_in_kb > 4)
2747 rdev->config.cik.mem_row_size_in_kb = 4;
2748 /* XXX use MC settings? */
2749 rdev->config.cik.shader_engine_tile_size = 32;
2750 rdev->config.cik.num_gpus = 1;
2751 rdev->config.cik.multi_gpu_tile_size = 64;
2752
2753 /* fix up row size */
2754 gb_addr_config &= ~ROW_SIZE_MASK;
2755 switch (rdev->config.cik.mem_row_size_in_kb) {
2756 case 1:
2757 default:
2758 gb_addr_config |= ROW_SIZE(0);
2759 break;
2760 case 2:
2761 gb_addr_config |= ROW_SIZE(1);
2762 break;
2763 case 4:
2764 gb_addr_config |= ROW_SIZE(2);
2765 break;
2766 }
2767
2768 /* setup tiling info dword. gb_addr_config is not adequate since it does
2769 * not have bank info, so create a custom tiling dword.
2770 * bits 3:0 num_pipes
2771 * bits 7:4 num_banks
2772 * bits 11:8 group_size
2773 * bits 15:12 row_size
2774 */
2775 rdev->config.cik.tile_config = 0;
2776 switch (rdev->config.cik.num_tile_pipes) {
2777 case 1:
2778 rdev->config.cik.tile_config |= (0 << 0);
2779 break;
2780 case 2:
2781 rdev->config.cik.tile_config |= (1 << 0);
2782 break;
2783 case 4:
2784 rdev->config.cik.tile_config |= (2 << 0);
2785 break;
2786 case 8:
2787 default:
2788 /* XXX what about 12? */
2789 rdev->config.cik.tile_config |= (3 << 0);
2790 break;
2791 }
2792 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2793 rdev->config.cik.tile_config |= 1 << 4;
2794 else
2795 rdev->config.cik.tile_config |= 0 << 4;
2796 rdev->config.cik.tile_config |=
2797 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2798 rdev->config.cik.tile_config |=
2799 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2800
2801 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2802 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2803 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04002804 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2805 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04002806 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2807 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2808 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04002809
2810 cik_tiling_mode_table_init(rdev);
2811
2812 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2813 rdev->config.cik.max_sh_per_se,
2814 rdev->config.cik.max_backends_per_se);
2815
2816 /* set HW defaults for 3D engine */
2817 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2818
2819 WREG32(SX_DEBUG_1, 0x20);
2820
2821 WREG32(TA_CNTL_AUX, 0x00010000);
2822
2823 tmp = RREG32(SPI_CONFIG_CNTL);
2824 tmp |= 0x03000000;
2825 WREG32(SPI_CONFIG_CNTL, tmp);
2826
2827 WREG32(SQ_CONFIG, 1);
2828
2829 WREG32(DB_DEBUG, 0);
2830
2831 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2832 tmp |= 0x00000400;
2833 WREG32(DB_DEBUG2, tmp);
2834
2835 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2836 tmp |= 0x00020200;
2837 WREG32(DB_DEBUG3, tmp);
2838
2839 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2840 tmp |= 0x00018208;
2841 WREG32(CB_HW_CONTROL, tmp);
2842
2843 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2844
2845 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2846 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2847 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2848 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2849
2850 WREG32(VGT_NUM_INSTANCES, 1);
2851
2852 WREG32(CP_PERFMON_CNTL, 0);
2853
2854 WREG32(SQ_CONFIG, 0);
2855
2856 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2857 FORCE_EOV_MAX_REZ_CNT(255)));
2858
2859 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2860 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2861
2862 WREG32(VGT_GS_VERTEX_REUSE, 16);
2863 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2864
2865 tmp = RREG32(HDP_MISC_CNTL);
2866 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2867 WREG32(HDP_MISC_CNTL, tmp);
2868
2869 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2870 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2871
2872 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2873 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2874
2875 udelay(50);
2876}
2877
Alex Deucher841cf442012-12-18 21:47:44 -05002878/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002879 * GPU scratch registers helpers function.
2880 */
2881/**
2882 * cik_scratch_init - setup driver info for CP scratch regs
2883 *
2884 * @rdev: radeon_device pointer
2885 *
2886 * Set up the number and offset of the CP scratch registers.
2887 * NOTE: use of CP scratch registers is a legacy inferface and
2888 * is not used by default on newer asics (r6xx+). On newer asics,
2889 * memory buffers are used for fences rather than scratch regs.
2890 */
2891static void cik_scratch_init(struct radeon_device *rdev)
2892{
2893 int i;
2894
2895 rdev->scratch.num_reg = 7;
2896 rdev->scratch.reg_base = SCRATCH_REG0;
2897 for (i = 0; i < rdev->scratch.num_reg; i++) {
2898 rdev->scratch.free[i] = true;
2899 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2900 }
2901}
2902
2903/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002904 * cik_ring_test - basic gfx ring test
2905 *
2906 * @rdev: radeon_device pointer
2907 * @ring: radeon_ring structure holding ring information
2908 *
2909 * Allocate a scratch register and write to it using the gfx ring (CIK).
2910 * Provides a basic gfx ring test to verify that the ring is working.
2911 * Used by cik_cp_gfx_resume();
2912 * Returns 0 on success, error on failure.
2913 */
2914int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2915{
2916 uint32_t scratch;
2917 uint32_t tmp = 0;
2918 unsigned i;
2919 int r;
2920
2921 r = radeon_scratch_get(rdev, &scratch);
2922 if (r) {
2923 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2924 return r;
2925 }
2926 WREG32(scratch, 0xCAFEDEAD);
2927 r = radeon_ring_lock(rdev, ring, 3);
2928 if (r) {
2929 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2930 radeon_scratch_free(rdev, scratch);
2931 return r;
2932 }
2933 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2934 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2935 radeon_ring_write(ring, 0xDEADBEEF);
2936 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002937
Alex Deucherfbc832c2012-07-20 14:41:35 -04002938 for (i = 0; i < rdev->usec_timeout; i++) {
2939 tmp = RREG32(scratch);
2940 if (tmp == 0xDEADBEEF)
2941 break;
2942 DRM_UDELAY(1);
2943 }
2944 if (i < rdev->usec_timeout) {
2945 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2946 } else {
2947 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2948 ring->idx, scratch, tmp);
2949 r = -EINVAL;
2950 }
2951 radeon_scratch_free(rdev, scratch);
2952 return r;
2953}
2954
2955/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002956 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002957 *
2958 * @rdev: radeon_device pointer
2959 * @fence: radeon fence object
2960 *
2961 * Emits a fence sequnce number on the gfx ring and flushes
2962 * GPU caches.
2963 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002964void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2965 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002966{
2967 struct radeon_ring *ring = &rdev->ring[fence->ring];
2968 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2969
2970 /* EVENT_WRITE_EOP - flush caches, send int */
2971 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2972 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2973 EOP_TC_ACTION_EN |
2974 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2975 EVENT_INDEX(5)));
2976 radeon_ring_write(ring, addr & 0xfffffffc);
2977 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2978 radeon_ring_write(ring, fence->seq);
2979 radeon_ring_write(ring, 0);
2980 /* HDP flush */
2981 /* We should be using the new WAIT_REG_MEM special op packet here
2982 * but it causes the CP to hang
2983 */
2984 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2985 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2986 WRITE_DATA_DST_SEL(0)));
2987 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2988 radeon_ring_write(ring, 0);
2989 radeon_ring_write(ring, 0);
2990}
2991
Alex Deucherb07fdd32013-04-11 09:36:17 -04002992/**
2993 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2994 *
2995 * @rdev: radeon_device pointer
2996 * @fence: radeon fence object
2997 *
2998 * Emits a fence sequnce number on the compute ring and flushes
2999 * GPU caches.
3000 */
3001void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3002 struct radeon_fence *fence)
3003{
3004 struct radeon_ring *ring = &rdev->ring[fence->ring];
3005 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3006
3007 /* RELEASE_MEM - flush caches, send int */
3008 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3009 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3010 EOP_TC_ACTION_EN |
3011 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3012 EVENT_INDEX(5)));
3013 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3014 radeon_ring_write(ring, addr & 0xfffffffc);
3015 radeon_ring_write(ring, upper_32_bits(addr));
3016 radeon_ring_write(ring, fence->seq);
3017 radeon_ring_write(ring, 0);
3018 /* HDP flush */
3019 /* We should be using the new WAIT_REG_MEM special op packet here
3020 * but it causes the CP to hang
3021 */
3022 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3023 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3024 WRITE_DATA_DST_SEL(0)));
3025 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3026 radeon_ring_write(ring, 0);
3027 radeon_ring_write(ring, 0);
3028}
3029
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003030void cik_semaphore_ring_emit(struct radeon_device *rdev,
3031 struct radeon_ring *ring,
3032 struct radeon_semaphore *semaphore,
3033 bool emit_wait)
3034{
3035 uint64_t addr = semaphore->gpu_addr;
3036 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3037
3038 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3039 radeon_ring_write(ring, addr & 0xffffffff);
3040 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3041}
3042
3043/*
3044 * IB stuff
3045 */
3046/**
3047 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3048 *
3049 * @rdev: radeon_device pointer
3050 * @ib: radeon indirect buffer object
3051 *
3052 * Emits an DE (drawing engine) or CE (constant engine) IB
3053 * on the gfx ring. IBs are usually generated by userspace
3054 * acceleration drivers and submitted to the kernel for
3055 * sheduling on the ring. This function schedules the IB
3056 * on the gfx ring for execution by the GPU.
3057 */
3058void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3059{
3060 struct radeon_ring *ring = &rdev->ring[ib->ring];
3061 u32 header, control = INDIRECT_BUFFER_VALID;
3062
3063 if (ib->is_const_ib) {
3064 /* set switch buffer packet before const IB */
3065 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3066 radeon_ring_write(ring, 0);
3067
3068 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3069 } else {
3070 u32 next_rptr;
3071 if (ring->rptr_save_reg) {
3072 next_rptr = ring->wptr + 3 + 4;
3073 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3074 radeon_ring_write(ring, ((ring->rptr_save_reg -
3075 PACKET3_SET_UCONFIG_REG_START) >> 2));
3076 radeon_ring_write(ring, next_rptr);
3077 } else if (rdev->wb.enabled) {
3078 next_rptr = ring->wptr + 5 + 4;
3079 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3080 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3081 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3082 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3083 radeon_ring_write(ring, next_rptr);
3084 }
3085
3086 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3087 }
3088
3089 control |= ib->length_dw |
3090 (ib->vm ? (ib->vm->id << 24) : 0);
3091
3092 radeon_ring_write(ring, header);
3093 radeon_ring_write(ring,
3094#ifdef __BIG_ENDIAN
3095 (2 << 0) |
3096#endif
3097 (ib->gpu_addr & 0xFFFFFFFC));
3098 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3099 radeon_ring_write(ring, control);
3100}
3101
Alex Deucherfbc832c2012-07-20 14:41:35 -04003102/**
3103 * cik_ib_test - basic gfx ring IB test
3104 *
3105 * @rdev: radeon_device pointer
3106 * @ring: radeon_ring structure holding ring information
3107 *
3108 * Allocate an IB and execute it on the gfx ring (CIK).
3109 * Provides a basic gfx ring test to verify that IBs are working.
3110 * Returns 0 on success, error on failure.
3111 */
3112int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3113{
3114 struct radeon_ib ib;
3115 uint32_t scratch;
3116 uint32_t tmp = 0;
3117 unsigned i;
3118 int r;
3119
3120 r = radeon_scratch_get(rdev, &scratch);
3121 if (r) {
3122 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3123 return r;
3124 }
3125 WREG32(scratch, 0xCAFEDEAD);
3126 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3127 if (r) {
3128 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3129 return r;
3130 }
3131 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3132 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3133 ib.ptr[2] = 0xDEADBEEF;
3134 ib.length_dw = 3;
3135 r = radeon_ib_schedule(rdev, &ib, NULL);
3136 if (r) {
3137 radeon_scratch_free(rdev, scratch);
3138 radeon_ib_free(rdev, &ib);
3139 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3140 return r;
3141 }
3142 r = radeon_fence_wait(ib.fence, false);
3143 if (r) {
3144 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3145 return r;
3146 }
3147 for (i = 0; i < rdev->usec_timeout; i++) {
3148 tmp = RREG32(scratch);
3149 if (tmp == 0xDEADBEEF)
3150 break;
3151 DRM_UDELAY(1);
3152 }
3153 if (i < rdev->usec_timeout) {
3154 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3155 } else {
3156 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3157 scratch, tmp);
3158 r = -EINVAL;
3159 }
3160 radeon_scratch_free(rdev, scratch);
3161 radeon_ib_free(rdev, &ib);
3162 return r;
3163}
3164
Alex Deucher2cae3bc2012-07-05 11:45:40 -04003165/*
Alex Deucher841cf442012-12-18 21:47:44 -05003166 * CP.
3167 * On CIK, gfx and compute now have independant command processors.
3168 *
3169 * GFX
3170 * Gfx consists of a single ring and can process both gfx jobs and
3171 * compute jobs. The gfx CP consists of three microengines (ME):
3172 * PFP - Pre-Fetch Parser
3173 * ME - Micro Engine
3174 * CE - Constant Engine
3175 * The PFP and ME make up what is considered the Drawing Engine (DE).
3176 * The CE is an asynchronous engine used for updating buffer desciptors
3177 * used by the DE so that they can be loaded into cache in parallel
3178 * while the DE is processing state update packets.
3179 *
3180 * Compute
3181 * The compute CP consists of two microengines (ME):
3182 * MEC1 - Compute MicroEngine 1
3183 * MEC2 - Compute MicroEngine 2
3184 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3185 * The queues are exposed to userspace and are programmed directly
3186 * by the compute runtime.
3187 */
3188/**
3189 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3190 *
3191 * @rdev: radeon_device pointer
3192 * @enable: enable or disable the MEs
3193 *
3194 * Halts or unhalts the gfx MEs.
3195 */
3196static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3197{
3198 if (enable)
3199 WREG32(CP_ME_CNTL, 0);
3200 else {
3201 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3202 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3203 }
3204 udelay(50);
3205}
3206
3207/**
3208 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3209 *
3210 * @rdev: radeon_device pointer
3211 *
3212 * Loads the gfx PFP, ME, and CE ucode.
3213 * Returns 0 for success, -EINVAL if the ucode is not available.
3214 */
3215static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3216{
3217 const __be32 *fw_data;
3218 int i;
3219
3220 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3221 return -EINVAL;
3222
3223 cik_cp_gfx_enable(rdev, false);
3224
3225 /* PFP */
3226 fw_data = (const __be32 *)rdev->pfp_fw->data;
3227 WREG32(CP_PFP_UCODE_ADDR, 0);
3228 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3229 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3230 WREG32(CP_PFP_UCODE_ADDR, 0);
3231
3232 /* CE */
3233 fw_data = (const __be32 *)rdev->ce_fw->data;
3234 WREG32(CP_CE_UCODE_ADDR, 0);
3235 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3236 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3237 WREG32(CP_CE_UCODE_ADDR, 0);
3238
3239 /* ME */
3240 fw_data = (const __be32 *)rdev->me_fw->data;
3241 WREG32(CP_ME_RAM_WADDR, 0);
3242 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3243 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3244 WREG32(CP_ME_RAM_WADDR, 0);
3245
3246 WREG32(CP_PFP_UCODE_ADDR, 0);
3247 WREG32(CP_CE_UCODE_ADDR, 0);
3248 WREG32(CP_ME_RAM_WADDR, 0);
3249 WREG32(CP_ME_RAM_RADDR, 0);
3250 return 0;
3251}
3252
3253/**
3254 * cik_cp_gfx_start - start the gfx ring
3255 *
3256 * @rdev: radeon_device pointer
3257 *
3258 * Enables the ring and loads the clear state context and other
3259 * packets required to init the ring.
3260 * Returns 0 for success, error for failure.
3261 */
3262static int cik_cp_gfx_start(struct radeon_device *rdev)
3263{
3264 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3265 int r, i;
3266
3267 /* init the CP */
3268 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3269 WREG32(CP_ENDIAN_SWAP, 0);
3270 WREG32(CP_DEVICE_ID, 1);
3271
3272 cik_cp_gfx_enable(rdev, true);
3273
3274 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3275 if (r) {
3276 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3277 return r;
3278 }
3279
3280 /* init the CE partitions. CE only used for gfx on CIK */
3281 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3282 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3283 radeon_ring_write(ring, 0xc000);
3284 radeon_ring_write(ring, 0xc000);
3285
3286 /* setup clear context state */
3287 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3288 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3289
3290 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3291 radeon_ring_write(ring, 0x80000000);
3292 radeon_ring_write(ring, 0x80000000);
3293
3294 for (i = 0; i < cik_default_size; i++)
3295 radeon_ring_write(ring, cik_default_state[i]);
3296
3297 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3298 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3299
3300 /* set clear context state */
3301 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3302 radeon_ring_write(ring, 0);
3303
3304 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3305 radeon_ring_write(ring, 0x00000316);
3306 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3307 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308
3309 radeon_ring_unlock_commit(rdev, ring);
3310
3311 return 0;
3312}
3313
3314/**
3315 * cik_cp_gfx_fini - stop the gfx ring
3316 *
3317 * @rdev: radeon_device pointer
3318 *
3319 * Stop the gfx ring and tear down the driver ring
3320 * info.
3321 */
3322static void cik_cp_gfx_fini(struct radeon_device *rdev)
3323{
3324 cik_cp_gfx_enable(rdev, false);
3325 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3326}
3327
3328/**
3329 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3330 *
3331 * @rdev: radeon_device pointer
3332 *
3333 * Program the location and size of the gfx ring buffer
3334 * and test it to make sure it's working.
3335 * Returns 0 for success, error for failure.
3336 */
3337static int cik_cp_gfx_resume(struct radeon_device *rdev)
3338{
3339 struct radeon_ring *ring;
3340 u32 tmp;
3341 u32 rb_bufsz;
3342 u64 rb_addr;
3343 int r;
3344
3345 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3346 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3347
3348 /* Set the write pointer delay */
3349 WREG32(CP_RB_WPTR_DELAY, 0);
3350
3351 /* set the RB to use vmid 0 */
3352 WREG32(CP_RB_VMID, 0);
3353
3354 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3355
3356 /* ring 0 - compute and gfx */
3357 /* Set ring buffer size */
3358 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3359 rb_bufsz = drm_order(ring->ring_size / 8);
3360 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3361#ifdef __BIG_ENDIAN
3362 tmp |= BUF_SWAP_32BIT;
3363#endif
3364 WREG32(CP_RB0_CNTL, tmp);
3365
3366 /* Initialize the ring buffer's read and write pointers */
3367 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3368 ring->wptr = 0;
3369 WREG32(CP_RB0_WPTR, ring->wptr);
3370
3371 /* set the wb address wether it's enabled or not */
3372 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3373 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3374
3375 /* scratch register shadowing is no longer supported */
3376 WREG32(SCRATCH_UMSK, 0);
3377
3378 if (!rdev->wb.enabled)
3379 tmp |= RB_NO_UPDATE;
3380
3381 mdelay(1);
3382 WREG32(CP_RB0_CNTL, tmp);
3383
3384 rb_addr = ring->gpu_addr >> 8;
3385 WREG32(CP_RB0_BASE, rb_addr);
3386 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3387
3388 ring->rptr = RREG32(CP_RB0_RPTR);
3389
3390 /* start the ring */
3391 cik_cp_gfx_start(rdev);
3392 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3393 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3394 if (r) {
3395 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3396 return r;
3397 }
3398 return 0;
3399}
3400
Alex Deucher963e81f2013-06-26 17:37:11 -04003401u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3402 struct radeon_ring *ring)
3403{
3404 u32 rptr;
3405
3406
3407
3408 if (rdev->wb.enabled) {
3409 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3410 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003411 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003412 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3413 rptr = RREG32(CP_HQD_PQ_RPTR);
3414 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003415 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003416 }
Alex Deucher963e81f2013-06-26 17:37:11 -04003417
3418 return rptr;
3419}
3420
3421u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3422 struct radeon_ring *ring)
3423{
3424 u32 wptr;
3425
3426 if (rdev->wb.enabled) {
3427 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3428 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04003429 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003430 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3431 wptr = RREG32(CP_HQD_PQ_WPTR);
3432 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003433 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003434 }
Alex Deucher963e81f2013-06-26 17:37:11 -04003435
3436 return wptr;
3437}
3438
3439void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3440 struct radeon_ring *ring)
3441{
Christian König2e1e6da2013-08-13 11:56:52 +02003442 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3443 WDOORBELL32(ring->doorbell_offset, ring->wptr);
Alex Deucher963e81f2013-06-26 17:37:11 -04003444}
3445
Alex Deucher841cf442012-12-18 21:47:44 -05003446/**
3447 * cik_cp_compute_enable - enable/disable the compute CP MEs
3448 *
3449 * @rdev: radeon_device pointer
3450 * @enable: enable or disable the MEs
3451 *
3452 * Halts or unhalts the compute MEs.
3453 */
3454static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3455{
3456 if (enable)
3457 WREG32(CP_MEC_CNTL, 0);
3458 else
3459 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3460 udelay(50);
3461}
3462
3463/**
3464 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3465 *
3466 * @rdev: radeon_device pointer
3467 *
3468 * Loads the compute MEC1&2 ucode.
3469 * Returns 0 for success, -EINVAL if the ucode is not available.
3470 */
3471static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3472{
3473 const __be32 *fw_data;
3474 int i;
3475
3476 if (!rdev->mec_fw)
3477 return -EINVAL;
3478
3479 cik_cp_compute_enable(rdev, false);
3480
3481 /* MEC1 */
3482 fw_data = (const __be32 *)rdev->mec_fw->data;
3483 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3484 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3485 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3486 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3487
3488 if (rdev->family == CHIP_KAVERI) {
3489 /* MEC2 */
3490 fw_data = (const __be32 *)rdev->mec_fw->data;
3491 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3492 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3493 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3494 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3495 }
3496
3497 return 0;
3498}
3499
3500/**
3501 * cik_cp_compute_start - start the compute queues
3502 *
3503 * @rdev: radeon_device pointer
3504 *
3505 * Enable the compute queues.
3506 * Returns 0 for success, error for failure.
3507 */
3508static int cik_cp_compute_start(struct radeon_device *rdev)
3509{
Alex Deucher963e81f2013-06-26 17:37:11 -04003510 cik_cp_compute_enable(rdev, true);
3511
Alex Deucher841cf442012-12-18 21:47:44 -05003512 return 0;
3513}
3514
3515/**
3516 * cik_cp_compute_fini - stop the compute queues
3517 *
3518 * @rdev: radeon_device pointer
3519 *
3520 * Stop the compute queues and tear down the driver queue
3521 * info.
3522 */
3523static void cik_cp_compute_fini(struct radeon_device *rdev)
3524{
Alex Deucher963e81f2013-06-26 17:37:11 -04003525 int i, idx, r;
3526
Alex Deucher841cf442012-12-18 21:47:44 -05003527 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04003528
3529 for (i = 0; i < 2; i++) {
3530 if (i == 0)
3531 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3532 else
3533 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3534
3535 if (rdev->ring[idx].mqd_obj) {
3536 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3537 if (unlikely(r != 0))
3538 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3539
3540 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3541 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3542
3543 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3544 rdev->ring[idx].mqd_obj = NULL;
3545 }
3546 }
Alex Deucher841cf442012-12-18 21:47:44 -05003547}
3548
Alex Deucher963e81f2013-06-26 17:37:11 -04003549static void cik_mec_fini(struct radeon_device *rdev)
3550{
3551 int r;
3552
3553 if (rdev->mec.hpd_eop_obj) {
3554 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3555 if (unlikely(r != 0))
3556 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3557 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3558 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3559
3560 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3561 rdev->mec.hpd_eop_obj = NULL;
3562 }
3563}
3564
3565#define MEC_HPD_SIZE 2048
3566
3567static int cik_mec_init(struct radeon_device *rdev)
3568{
3569 int r;
3570 u32 *hpd;
3571
3572 /*
3573 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3574 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3575 */
3576 if (rdev->family == CHIP_KAVERI)
3577 rdev->mec.num_mec = 2;
3578 else
3579 rdev->mec.num_mec = 1;
3580 rdev->mec.num_pipe = 4;
3581 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3582
3583 if (rdev->mec.hpd_eop_obj == NULL) {
3584 r = radeon_bo_create(rdev,
3585 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3586 PAGE_SIZE, true,
3587 RADEON_GEM_DOMAIN_GTT, NULL,
3588 &rdev->mec.hpd_eop_obj);
3589 if (r) {
3590 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3591 return r;
3592 }
3593 }
3594
3595 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3596 if (unlikely(r != 0)) {
3597 cik_mec_fini(rdev);
3598 return r;
3599 }
3600 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3601 &rdev->mec.hpd_eop_gpu_addr);
3602 if (r) {
3603 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3604 cik_mec_fini(rdev);
3605 return r;
3606 }
3607 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3608 if (r) {
3609 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3610 cik_mec_fini(rdev);
3611 return r;
3612 }
3613
3614 /* clear memory. Not sure if this is required or not */
3615 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3616
3617 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3618 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3619
3620 return 0;
3621}
3622
3623struct hqd_registers
3624{
3625 u32 cp_mqd_base_addr;
3626 u32 cp_mqd_base_addr_hi;
3627 u32 cp_hqd_active;
3628 u32 cp_hqd_vmid;
3629 u32 cp_hqd_persistent_state;
3630 u32 cp_hqd_pipe_priority;
3631 u32 cp_hqd_queue_priority;
3632 u32 cp_hqd_quantum;
3633 u32 cp_hqd_pq_base;
3634 u32 cp_hqd_pq_base_hi;
3635 u32 cp_hqd_pq_rptr;
3636 u32 cp_hqd_pq_rptr_report_addr;
3637 u32 cp_hqd_pq_rptr_report_addr_hi;
3638 u32 cp_hqd_pq_wptr_poll_addr;
3639 u32 cp_hqd_pq_wptr_poll_addr_hi;
3640 u32 cp_hqd_pq_doorbell_control;
3641 u32 cp_hqd_pq_wptr;
3642 u32 cp_hqd_pq_control;
3643 u32 cp_hqd_ib_base_addr;
3644 u32 cp_hqd_ib_base_addr_hi;
3645 u32 cp_hqd_ib_rptr;
3646 u32 cp_hqd_ib_control;
3647 u32 cp_hqd_iq_timer;
3648 u32 cp_hqd_iq_rptr;
3649 u32 cp_hqd_dequeue_request;
3650 u32 cp_hqd_dma_offload;
3651 u32 cp_hqd_sema_cmd;
3652 u32 cp_hqd_msg_type;
3653 u32 cp_hqd_atomic0_preop_lo;
3654 u32 cp_hqd_atomic0_preop_hi;
3655 u32 cp_hqd_atomic1_preop_lo;
3656 u32 cp_hqd_atomic1_preop_hi;
3657 u32 cp_hqd_hq_scheduler0;
3658 u32 cp_hqd_hq_scheduler1;
3659 u32 cp_mqd_control;
3660};
3661
3662struct bonaire_mqd
3663{
3664 u32 header;
3665 u32 dispatch_initiator;
3666 u32 dimensions[3];
3667 u32 start_idx[3];
3668 u32 num_threads[3];
3669 u32 pipeline_stat_enable;
3670 u32 perf_counter_enable;
3671 u32 pgm[2];
3672 u32 tba[2];
3673 u32 tma[2];
3674 u32 pgm_rsrc[2];
3675 u32 vmid;
3676 u32 resource_limits;
3677 u32 static_thread_mgmt01[2];
3678 u32 tmp_ring_size;
3679 u32 static_thread_mgmt23[2];
3680 u32 restart[3];
3681 u32 thread_trace_enable;
3682 u32 reserved1;
3683 u32 user_data[16];
3684 u32 vgtcs_invoke_count[2];
3685 struct hqd_registers queue_state;
3686 u32 dequeue_cntr;
3687 u32 interrupt_queue[64];
3688};
3689
Alex Deucher841cf442012-12-18 21:47:44 -05003690/**
3691 * cik_cp_compute_resume - setup the compute queue registers
3692 *
3693 * @rdev: radeon_device pointer
3694 *
3695 * Program the compute queues and test them to make sure they
3696 * are working.
3697 * Returns 0 for success, error for failure.
3698 */
3699static int cik_cp_compute_resume(struct radeon_device *rdev)
3700{
Alex Deucher963e81f2013-06-26 17:37:11 -04003701 int r, i, idx;
3702 u32 tmp;
3703 bool use_doorbell = true;
3704 u64 hqd_gpu_addr;
3705 u64 mqd_gpu_addr;
3706 u64 eop_gpu_addr;
3707 u64 wb_gpu_addr;
3708 u32 *buf;
3709 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05003710
Alex Deucher841cf442012-12-18 21:47:44 -05003711 r = cik_cp_compute_start(rdev);
3712 if (r)
3713 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04003714
3715 /* fix up chicken bits */
3716 tmp = RREG32(CP_CPF_DEBUG);
3717 tmp |= (1 << 23);
3718 WREG32(CP_CPF_DEBUG, tmp);
3719
3720 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04003721 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003722 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3723 int me = (i < 4) ? 1 : 2;
3724 int pipe = (i < 4) ? i : (i - 4);
3725
3726 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3727
3728 cik_srbm_select(rdev, me, pipe, 0, 0);
3729
3730 /* write the EOP addr */
3731 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3732 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3733
3734 /* set the VMID assigned */
3735 WREG32(CP_HPD_EOP_VMID, 0);
3736
3737 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3738 tmp = RREG32(CP_HPD_EOP_CONTROL);
3739 tmp &= ~EOP_SIZE_MASK;
3740 tmp |= drm_order(MEC_HPD_SIZE / 8);
3741 WREG32(CP_HPD_EOP_CONTROL, tmp);
3742 }
3743 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003744 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003745
3746 /* init the queues. Just two for now. */
3747 for (i = 0; i < 2; i++) {
3748 if (i == 0)
3749 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3750 else
3751 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3752
3753 if (rdev->ring[idx].mqd_obj == NULL) {
3754 r = radeon_bo_create(rdev,
3755 sizeof(struct bonaire_mqd),
3756 PAGE_SIZE, true,
3757 RADEON_GEM_DOMAIN_GTT, NULL,
3758 &rdev->ring[idx].mqd_obj);
3759 if (r) {
3760 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3761 return r;
3762 }
3763 }
3764
3765 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3766 if (unlikely(r != 0)) {
3767 cik_cp_compute_fini(rdev);
3768 return r;
3769 }
3770 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3771 &mqd_gpu_addr);
3772 if (r) {
3773 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3774 cik_cp_compute_fini(rdev);
3775 return r;
3776 }
3777 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3778 if (r) {
3779 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3780 cik_cp_compute_fini(rdev);
3781 return r;
3782 }
3783
3784 /* doorbell offset */
3785 rdev->ring[idx].doorbell_offset =
3786 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3787
3788 /* init the mqd struct */
3789 memset(buf, 0, sizeof(struct bonaire_mqd));
3790
3791 mqd = (struct bonaire_mqd *)buf;
3792 mqd->header = 0xC0310800;
3793 mqd->static_thread_mgmt01[0] = 0xffffffff;
3794 mqd->static_thread_mgmt01[1] = 0xffffffff;
3795 mqd->static_thread_mgmt23[0] = 0xffffffff;
3796 mqd->static_thread_mgmt23[1] = 0xffffffff;
3797
Alex Deucherf61d5b462013-08-06 12:40:16 -04003798 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003799 cik_srbm_select(rdev, rdev->ring[idx].me,
3800 rdev->ring[idx].pipe,
3801 rdev->ring[idx].queue, 0);
3802
3803 /* disable wptr polling */
3804 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3805 tmp &= ~WPTR_POLL_EN;
3806 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3807
3808 /* enable doorbell? */
3809 mqd->queue_state.cp_hqd_pq_doorbell_control =
3810 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3811 if (use_doorbell)
3812 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3813 else
3814 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3815 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3816 mqd->queue_state.cp_hqd_pq_doorbell_control);
3817
3818 /* disable the queue if it's active */
3819 mqd->queue_state.cp_hqd_dequeue_request = 0;
3820 mqd->queue_state.cp_hqd_pq_rptr = 0;
3821 mqd->queue_state.cp_hqd_pq_wptr= 0;
3822 if (RREG32(CP_HQD_ACTIVE) & 1) {
3823 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3824 for (i = 0; i < rdev->usec_timeout; i++) {
3825 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3826 break;
3827 udelay(1);
3828 }
3829 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3830 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3831 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3832 }
3833
3834 /* set the pointer to the MQD */
3835 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3836 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3837 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3838 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3839 /* set MQD vmid to 0 */
3840 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3841 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3842 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3843
3844 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3845 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3846 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3847 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3848 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3849 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3850
3851 /* set up the HQD, this is similar to CP_RB0_CNTL */
3852 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3853 mqd->queue_state.cp_hqd_pq_control &=
3854 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3855
3856 mqd->queue_state.cp_hqd_pq_control |=
3857 drm_order(rdev->ring[idx].ring_size / 8);
3858 mqd->queue_state.cp_hqd_pq_control |=
3859 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3860#ifdef __BIG_ENDIAN
3861 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3862#endif
3863 mqd->queue_state.cp_hqd_pq_control &=
3864 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3865 mqd->queue_state.cp_hqd_pq_control |=
3866 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3867 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3868
3869 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3870 if (i == 0)
3871 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3872 else
3873 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3874 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3875 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3876 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3877 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3878 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3879
3880 /* set the wb address wether it's enabled or not */
3881 if (i == 0)
3882 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3883 else
3884 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3885 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3886 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3887 upper_32_bits(wb_gpu_addr) & 0xffff;
3888 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3889 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3890 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3891 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3892
3893 /* enable the doorbell if requested */
3894 if (use_doorbell) {
3895 mqd->queue_state.cp_hqd_pq_doorbell_control =
3896 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3897 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3898 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3899 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3900 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3901 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3902 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3903
3904 } else {
3905 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3906 }
3907 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3908 mqd->queue_state.cp_hqd_pq_doorbell_control);
3909
3910 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3911 rdev->ring[idx].wptr = 0;
3912 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3913 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3914 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3915 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3916
3917 /* set the vmid for the queue */
3918 mqd->queue_state.cp_hqd_vmid = 0;
3919 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3920
3921 /* activate the queue */
3922 mqd->queue_state.cp_hqd_active = 1;
3923 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3924
3925 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003926 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003927
3928 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3929 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3930
3931 rdev->ring[idx].ready = true;
3932 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3933 if (r)
3934 rdev->ring[idx].ready = false;
3935 }
3936
Alex Deucher841cf442012-12-18 21:47:44 -05003937 return 0;
3938}
3939
Alex Deucher841cf442012-12-18 21:47:44 -05003940static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3941{
3942 cik_cp_gfx_enable(rdev, enable);
3943 cik_cp_compute_enable(rdev, enable);
3944}
3945
Alex Deucher841cf442012-12-18 21:47:44 -05003946static int cik_cp_load_microcode(struct radeon_device *rdev)
3947{
3948 int r;
3949
3950 r = cik_cp_gfx_load_microcode(rdev);
3951 if (r)
3952 return r;
3953 r = cik_cp_compute_load_microcode(rdev);
3954 if (r)
3955 return r;
3956
3957 return 0;
3958}
3959
Alex Deucher841cf442012-12-18 21:47:44 -05003960static void cik_cp_fini(struct radeon_device *rdev)
3961{
3962 cik_cp_gfx_fini(rdev);
3963 cik_cp_compute_fini(rdev);
3964}
3965
Alex Deucher841cf442012-12-18 21:47:44 -05003966static int cik_cp_resume(struct radeon_device *rdev)
3967{
3968 int r;
3969
3970 /* Reset all cp blocks */
3971 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3972 RREG32(GRBM_SOFT_RESET);
3973 mdelay(15);
3974 WREG32(GRBM_SOFT_RESET, 0);
3975 RREG32(GRBM_SOFT_RESET);
3976
3977 r = cik_cp_load_microcode(rdev);
3978 if (r)
3979 return r;
3980
3981 r = cik_cp_gfx_resume(rdev);
3982 if (r)
3983 return r;
3984 r = cik_cp_compute_resume(rdev);
3985 if (r)
3986 return r;
3987
3988 return 0;
3989}
3990
Alex Deucher21a93e12013-04-09 12:47:11 -04003991/*
3992 * sDMA - System DMA
3993 * Starting with CIK, the GPU has new asynchronous
3994 * DMA engines. These engines are used for compute
3995 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3996 * and each one supports 1 ring buffer used for gfx
3997 * and 2 queues used for compute.
3998 *
3999 * The programming model is very similar to the CP
4000 * (ring buffer, IBs, etc.), but sDMA has it's own
4001 * packet format that is different from the PM4 format
4002 * used by the CP. sDMA supports copying data, writing
4003 * embedded data, solid fills, and a number of other
4004 * things. It also has support for tiling/detiling of
4005 * buffers.
4006 */
4007/**
4008 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4009 *
4010 * @rdev: radeon_device pointer
4011 * @ib: IB object to schedule
4012 *
4013 * Schedule an IB in the DMA ring (CIK).
4014 */
4015void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4016 struct radeon_ib *ib)
4017{
4018 struct radeon_ring *ring = &rdev->ring[ib->ring];
4019 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4020
4021 if (rdev->wb.enabled) {
4022 u32 next_rptr = ring->wptr + 5;
4023 while ((next_rptr & 7) != 4)
4024 next_rptr++;
4025 next_rptr += 4;
4026 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4027 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4028 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4029 radeon_ring_write(ring, 1); /* number of DWs to follow */
4030 radeon_ring_write(ring, next_rptr);
4031 }
4032
4033 /* IB packet must end on a 8 DW boundary */
4034 while ((ring->wptr & 7) != 4)
4035 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4036 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4037 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4038 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4039 radeon_ring_write(ring, ib->length_dw);
4040
4041}
4042
4043/**
4044 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4045 *
4046 * @rdev: radeon_device pointer
4047 * @fence: radeon fence object
4048 *
4049 * Add a DMA fence packet to the ring to write
4050 * the fence seq number and DMA trap packet to generate
4051 * an interrupt if needed (CIK).
4052 */
4053void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4054 struct radeon_fence *fence)
4055{
4056 struct radeon_ring *ring = &rdev->ring[fence->ring];
4057 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4058 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4059 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4060 u32 ref_and_mask;
4061
4062 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4063 ref_and_mask = SDMA0;
4064 else
4065 ref_and_mask = SDMA1;
4066
4067 /* write the fence */
4068 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4069 radeon_ring_write(ring, addr & 0xffffffff);
4070 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4071 radeon_ring_write(ring, fence->seq);
4072 /* generate an interrupt */
4073 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4074 /* flush HDP */
4075 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4076 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4077 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4078 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4079 radeon_ring_write(ring, ref_and_mask); /* MASK */
4080 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4081}
4082
4083/**
4084 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4085 *
4086 * @rdev: radeon_device pointer
4087 * @ring: radeon_ring structure holding ring information
4088 * @semaphore: radeon semaphore object
4089 * @emit_wait: wait or signal semaphore
4090 *
4091 * Add a DMA semaphore packet to the ring wait on or signal
4092 * other rings (CIK).
4093 */
4094void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4095 struct radeon_ring *ring,
4096 struct radeon_semaphore *semaphore,
4097 bool emit_wait)
4098{
4099 u64 addr = semaphore->gpu_addr;
4100 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4101
4102 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4103 radeon_ring_write(ring, addr & 0xfffffff8);
4104 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4105}
4106
4107/**
4108 * cik_sdma_gfx_stop - stop the gfx async dma engines
4109 *
4110 * @rdev: radeon_device pointer
4111 *
4112 * Stop the gfx async dma ring buffers (CIK).
4113 */
4114static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4115{
4116 u32 rb_cntl, reg_offset;
4117 int i;
4118
4119 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4120
4121 for (i = 0; i < 2; i++) {
4122 if (i == 0)
4123 reg_offset = SDMA0_REGISTER_OFFSET;
4124 else
4125 reg_offset = SDMA1_REGISTER_OFFSET;
4126 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4127 rb_cntl &= ~SDMA_RB_ENABLE;
4128 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4129 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4130 }
4131}
4132
4133/**
4134 * cik_sdma_rlc_stop - stop the compute async dma engines
4135 *
4136 * @rdev: radeon_device pointer
4137 *
4138 * Stop the compute async dma queues (CIK).
4139 */
4140static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4141{
4142 /* XXX todo */
4143}
4144
4145/**
4146 * cik_sdma_enable - stop the async dma engines
4147 *
4148 * @rdev: radeon_device pointer
4149 * @enable: enable/disable the DMA MEs.
4150 *
4151 * Halt or unhalt the async dma engines (CIK).
4152 */
4153static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4154{
4155 u32 me_cntl, reg_offset;
4156 int i;
4157
4158 for (i = 0; i < 2; i++) {
4159 if (i == 0)
4160 reg_offset = SDMA0_REGISTER_OFFSET;
4161 else
4162 reg_offset = SDMA1_REGISTER_OFFSET;
4163 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4164 if (enable)
4165 me_cntl &= ~SDMA_HALT;
4166 else
4167 me_cntl |= SDMA_HALT;
4168 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4169 }
4170}
4171
4172/**
4173 * cik_sdma_gfx_resume - setup and start the async dma engines
4174 *
4175 * @rdev: radeon_device pointer
4176 *
4177 * Set up the gfx DMA ring buffers and enable them (CIK).
4178 * Returns 0 for success, error for failure.
4179 */
4180static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4181{
4182 struct radeon_ring *ring;
4183 u32 rb_cntl, ib_cntl;
4184 u32 rb_bufsz;
4185 u32 reg_offset, wb_offset;
4186 int i, r;
4187
4188 for (i = 0; i < 2; i++) {
4189 if (i == 0) {
4190 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4191 reg_offset = SDMA0_REGISTER_OFFSET;
4192 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4193 } else {
4194 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4195 reg_offset = SDMA1_REGISTER_OFFSET;
4196 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4197 }
4198
4199 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4200 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4201
4202 /* Set ring buffer size in dwords */
4203 rb_bufsz = drm_order(ring->ring_size / 4);
4204 rb_cntl = rb_bufsz << 1;
4205#ifdef __BIG_ENDIAN
4206 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4207#endif
4208 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4209
4210 /* Initialize the ring buffer's read and write pointers */
4211 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4212 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4213
4214 /* set the wb address whether it's enabled or not */
4215 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4216 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4217 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4218 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4219
4220 if (rdev->wb.enabled)
4221 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4222
4223 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4224 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4225
4226 ring->wptr = 0;
4227 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4228
4229 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4230
4231 /* enable DMA RB */
4232 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4233
4234 ib_cntl = SDMA_IB_ENABLE;
4235#ifdef __BIG_ENDIAN
4236 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4237#endif
4238 /* enable DMA IBs */
4239 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4240
4241 ring->ready = true;
4242
4243 r = radeon_ring_test(rdev, ring->idx, ring);
4244 if (r) {
4245 ring->ready = false;
4246 return r;
4247 }
4248 }
4249
4250 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4251
4252 return 0;
4253}
4254
4255/**
4256 * cik_sdma_rlc_resume - setup and start the async dma engines
4257 *
4258 * @rdev: radeon_device pointer
4259 *
4260 * Set up the compute DMA queues and enable them (CIK).
4261 * Returns 0 for success, error for failure.
4262 */
4263static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4264{
4265 /* XXX todo */
4266 return 0;
4267}
4268
4269/**
4270 * cik_sdma_load_microcode - load the sDMA ME ucode
4271 *
4272 * @rdev: radeon_device pointer
4273 *
4274 * Loads the sDMA0/1 ucode.
4275 * Returns 0 for success, -EINVAL if the ucode is not available.
4276 */
4277static int cik_sdma_load_microcode(struct radeon_device *rdev)
4278{
4279 const __be32 *fw_data;
4280 int i;
4281
4282 if (!rdev->sdma_fw)
4283 return -EINVAL;
4284
4285 /* stop the gfx rings and rlc compute queues */
4286 cik_sdma_gfx_stop(rdev);
4287 cik_sdma_rlc_stop(rdev);
4288
4289 /* halt the MEs */
4290 cik_sdma_enable(rdev, false);
4291
4292 /* sdma0 */
4293 fw_data = (const __be32 *)rdev->sdma_fw->data;
4294 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4295 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4296 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4297 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4298
4299 /* sdma1 */
4300 fw_data = (const __be32 *)rdev->sdma_fw->data;
4301 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4302 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4303 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4304 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4305
4306 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4307 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4308 return 0;
4309}
4310
4311/**
4312 * cik_sdma_resume - setup and start the async dma engines
4313 *
4314 * @rdev: radeon_device pointer
4315 *
4316 * Set up the DMA engines and enable them (CIK).
4317 * Returns 0 for success, error for failure.
4318 */
4319static int cik_sdma_resume(struct radeon_device *rdev)
4320{
4321 int r;
4322
4323 /* Reset dma */
4324 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4325 RREG32(SRBM_SOFT_RESET);
4326 udelay(50);
4327 WREG32(SRBM_SOFT_RESET, 0);
4328 RREG32(SRBM_SOFT_RESET);
4329
4330 r = cik_sdma_load_microcode(rdev);
4331 if (r)
4332 return r;
4333
4334 /* unhalt the MEs */
4335 cik_sdma_enable(rdev, true);
4336
4337 /* start the gfx rings and rlc compute queues */
4338 r = cik_sdma_gfx_resume(rdev);
4339 if (r)
4340 return r;
4341 r = cik_sdma_rlc_resume(rdev);
4342 if (r)
4343 return r;
4344
4345 return 0;
4346}
4347
4348/**
4349 * cik_sdma_fini - tear down the async dma engines
4350 *
4351 * @rdev: radeon_device pointer
4352 *
4353 * Stop the async dma engines and free the rings (CIK).
4354 */
4355static void cik_sdma_fini(struct radeon_device *rdev)
4356{
4357 /* stop the gfx rings and rlc compute queues */
4358 cik_sdma_gfx_stop(rdev);
4359 cik_sdma_rlc_stop(rdev);
4360 /* halt the MEs */
4361 cik_sdma_enable(rdev, false);
4362 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4363 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4364 /* XXX - compute dma queue tear down */
4365}
4366
4367/**
4368 * cik_copy_dma - copy pages using the DMA engine
4369 *
4370 * @rdev: radeon_device pointer
4371 * @src_offset: src GPU address
4372 * @dst_offset: dst GPU address
4373 * @num_gpu_pages: number of GPU pages to xfer
4374 * @fence: radeon fence object
4375 *
4376 * Copy GPU paging using the DMA engine (CIK).
4377 * Used by the radeon ttm implementation to move pages if
4378 * registered as the asic copy callback.
4379 */
4380int cik_copy_dma(struct radeon_device *rdev,
4381 uint64_t src_offset, uint64_t dst_offset,
4382 unsigned num_gpu_pages,
4383 struct radeon_fence **fence)
4384{
4385 struct radeon_semaphore *sem = NULL;
4386 int ring_index = rdev->asic->copy.dma_ring_index;
4387 struct radeon_ring *ring = &rdev->ring[ring_index];
4388 u32 size_in_bytes, cur_size_in_bytes;
4389 int i, num_loops;
4390 int r = 0;
4391
4392 r = radeon_semaphore_create(rdev, &sem);
4393 if (r) {
4394 DRM_ERROR("radeon: moving bo (%d).\n", r);
4395 return r;
4396 }
4397
4398 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4399 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4400 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4401 if (r) {
4402 DRM_ERROR("radeon: moving bo (%d).\n", r);
4403 radeon_semaphore_free(rdev, &sem, NULL);
4404 return r;
4405 }
4406
4407 if (radeon_fence_need_sync(*fence, ring->idx)) {
4408 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4409 ring->idx);
4410 radeon_fence_note_sync(*fence, ring->idx);
4411 } else {
4412 radeon_semaphore_free(rdev, &sem, NULL);
4413 }
4414
4415 for (i = 0; i < num_loops; i++) {
4416 cur_size_in_bytes = size_in_bytes;
4417 if (cur_size_in_bytes > 0x1fffff)
4418 cur_size_in_bytes = 0x1fffff;
4419 size_in_bytes -= cur_size_in_bytes;
4420 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4421 radeon_ring_write(ring, cur_size_in_bytes);
4422 radeon_ring_write(ring, 0); /* src/dst endian swap */
4423 radeon_ring_write(ring, src_offset & 0xffffffff);
4424 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4425 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4426 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4427 src_offset += cur_size_in_bytes;
4428 dst_offset += cur_size_in_bytes;
4429 }
4430
4431 r = radeon_fence_emit(rdev, fence, ring->idx);
4432 if (r) {
4433 radeon_ring_unlock_undo(rdev, ring);
4434 return r;
4435 }
4436
4437 radeon_ring_unlock_commit(rdev, ring);
4438 radeon_semaphore_free(rdev, &sem, *fence);
4439
4440 return r;
4441}
4442
4443/**
4444 * cik_sdma_ring_test - simple async dma engine test
4445 *
4446 * @rdev: radeon_device pointer
4447 * @ring: radeon_ring structure holding ring information
4448 *
4449 * Test the DMA engine by writing using it to write an
4450 * value to memory. (CIK).
4451 * Returns 0 for success, error for failure.
4452 */
4453int cik_sdma_ring_test(struct radeon_device *rdev,
4454 struct radeon_ring *ring)
4455{
4456 unsigned i;
4457 int r;
4458 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4459 u32 tmp;
4460
4461 if (!ptr) {
4462 DRM_ERROR("invalid vram scratch pointer\n");
4463 return -EINVAL;
4464 }
4465
4466 tmp = 0xCAFEDEAD;
4467 writel(tmp, ptr);
4468
4469 r = radeon_ring_lock(rdev, ring, 4);
4470 if (r) {
4471 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4472 return r;
4473 }
4474 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4475 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4476 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4477 radeon_ring_write(ring, 1); /* number of DWs to follow */
4478 radeon_ring_write(ring, 0xDEADBEEF);
4479 radeon_ring_unlock_commit(rdev, ring);
4480
4481 for (i = 0; i < rdev->usec_timeout; i++) {
4482 tmp = readl(ptr);
4483 if (tmp == 0xDEADBEEF)
4484 break;
4485 DRM_UDELAY(1);
4486 }
4487
4488 if (i < rdev->usec_timeout) {
4489 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4490 } else {
4491 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4492 ring->idx, tmp);
4493 r = -EINVAL;
4494 }
4495 return r;
4496}
4497
4498/**
4499 * cik_sdma_ib_test - test an IB on the DMA engine
4500 *
4501 * @rdev: radeon_device pointer
4502 * @ring: radeon_ring structure holding ring information
4503 *
4504 * Test a simple IB in the DMA ring (CIK).
4505 * Returns 0 on success, error on failure.
4506 */
4507int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4508{
4509 struct radeon_ib ib;
4510 unsigned i;
4511 int r;
4512 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4513 u32 tmp = 0;
4514
4515 if (!ptr) {
4516 DRM_ERROR("invalid vram scratch pointer\n");
4517 return -EINVAL;
4518 }
4519
4520 tmp = 0xCAFEDEAD;
4521 writel(tmp, ptr);
4522
4523 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4524 if (r) {
4525 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4526 return r;
4527 }
4528
4529 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4530 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4531 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4532 ib.ptr[3] = 1;
4533 ib.ptr[4] = 0xDEADBEEF;
4534 ib.length_dw = 5;
4535
4536 r = radeon_ib_schedule(rdev, &ib, NULL);
4537 if (r) {
4538 radeon_ib_free(rdev, &ib);
4539 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4540 return r;
4541 }
4542 r = radeon_fence_wait(ib.fence, false);
4543 if (r) {
4544 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4545 return r;
4546 }
4547 for (i = 0; i < rdev->usec_timeout; i++) {
4548 tmp = readl(ptr);
4549 if (tmp == 0xDEADBEEF)
4550 break;
4551 DRM_UDELAY(1);
4552 }
4553 if (i < rdev->usec_timeout) {
4554 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4555 } else {
4556 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4557 r = -EINVAL;
4558 }
4559 radeon_ib_free(rdev, &ib);
4560 return r;
4561}
4562
Alex Deuchercc066712013-04-09 12:59:51 -04004563
4564static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4565{
4566 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4567 RREG32(GRBM_STATUS));
4568 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4569 RREG32(GRBM_STATUS2));
4570 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4571 RREG32(GRBM_STATUS_SE0));
4572 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4573 RREG32(GRBM_STATUS_SE1));
4574 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4575 RREG32(GRBM_STATUS_SE2));
4576 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4577 RREG32(GRBM_STATUS_SE3));
4578 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4579 RREG32(SRBM_STATUS));
4580 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4581 RREG32(SRBM_STATUS2));
4582 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4583 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4584 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4585 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04004586 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4587 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4588 RREG32(CP_STALLED_STAT1));
4589 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4590 RREG32(CP_STALLED_STAT2));
4591 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4592 RREG32(CP_STALLED_STAT3));
4593 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4594 RREG32(CP_CPF_BUSY_STAT));
4595 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4596 RREG32(CP_CPF_STALLED_STAT1));
4597 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4598 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4599 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4600 RREG32(CP_CPC_STALLED_STAT1));
4601 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04004602}
4603
Alex Deucher6f2043c2013-04-09 12:43:41 -04004604/**
Alex Deuchercc066712013-04-09 12:59:51 -04004605 * cik_gpu_check_soft_reset - check which blocks are busy
4606 *
4607 * @rdev: radeon_device pointer
4608 *
4609 * Check which blocks are busy and return the relevant reset
4610 * mask to be used by cik_gpu_soft_reset().
4611 * Returns a mask of the blocks to be reset.
4612 */
4613static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4614{
4615 u32 reset_mask = 0;
4616 u32 tmp;
4617
4618 /* GRBM_STATUS */
4619 tmp = RREG32(GRBM_STATUS);
4620 if (tmp & (PA_BUSY | SC_BUSY |
4621 BCI_BUSY | SX_BUSY |
4622 TA_BUSY | VGT_BUSY |
4623 DB_BUSY | CB_BUSY |
4624 GDS_BUSY | SPI_BUSY |
4625 IA_BUSY | IA_BUSY_NO_DMA))
4626 reset_mask |= RADEON_RESET_GFX;
4627
4628 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4629 reset_mask |= RADEON_RESET_CP;
4630
4631 /* GRBM_STATUS2 */
4632 tmp = RREG32(GRBM_STATUS2);
4633 if (tmp & RLC_BUSY)
4634 reset_mask |= RADEON_RESET_RLC;
4635
4636 /* SDMA0_STATUS_REG */
4637 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4638 if (!(tmp & SDMA_IDLE))
4639 reset_mask |= RADEON_RESET_DMA;
4640
4641 /* SDMA1_STATUS_REG */
4642 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4643 if (!(tmp & SDMA_IDLE))
4644 reset_mask |= RADEON_RESET_DMA1;
4645
4646 /* SRBM_STATUS2 */
4647 tmp = RREG32(SRBM_STATUS2);
4648 if (tmp & SDMA_BUSY)
4649 reset_mask |= RADEON_RESET_DMA;
4650
4651 if (tmp & SDMA1_BUSY)
4652 reset_mask |= RADEON_RESET_DMA1;
4653
4654 /* SRBM_STATUS */
4655 tmp = RREG32(SRBM_STATUS);
4656
4657 if (tmp & IH_BUSY)
4658 reset_mask |= RADEON_RESET_IH;
4659
4660 if (tmp & SEM_BUSY)
4661 reset_mask |= RADEON_RESET_SEM;
4662
4663 if (tmp & GRBM_RQ_PENDING)
4664 reset_mask |= RADEON_RESET_GRBM;
4665
4666 if (tmp & VMC_BUSY)
4667 reset_mask |= RADEON_RESET_VMC;
4668
4669 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4670 MCC_BUSY | MCD_BUSY))
4671 reset_mask |= RADEON_RESET_MC;
4672
4673 if (evergreen_is_display_hung(rdev))
4674 reset_mask |= RADEON_RESET_DISPLAY;
4675
4676 /* Skip MC reset as it's mostly likely not hung, just busy */
4677 if (reset_mask & RADEON_RESET_MC) {
4678 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4679 reset_mask &= ~RADEON_RESET_MC;
4680 }
4681
4682 return reset_mask;
4683}
4684
4685/**
4686 * cik_gpu_soft_reset - soft reset GPU
4687 *
4688 * @rdev: radeon_device pointer
4689 * @reset_mask: mask of which blocks to reset
4690 *
4691 * Soft reset the blocks specified in @reset_mask.
4692 */
4693static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4694{
4695 struct evergreen_mc_save save;
4696 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4697 u32 tmp;
4698
4699 if (reset_mask == 0)
4700 return;
4701
4702 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4703
4704 cik_print_gpu_status_regs(rdev);
4705 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4706 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4707 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4708 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4709
4710 /* stop the rlc */
4711 cik_rlc_stop(rdev);
4712
4713 /* Disable GFX parsing/prefetching */
4714 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4715
4716 /* Disable MEC parsing/prefetching */
4717 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4718
4719 if (reset_mask & RADEON_RESET_DMA) {
4720 /* sdma0 */
4721 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4722 tmp |= SDMA_HALT;
4723 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4724 }
4725 if (reset_mask & RADEON_RESET_DMA1) {
4726 /* sdma1 */
4727 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4728 tmp |= SDMA_HALT;
4729 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4730 }
4731
4732 evergreen_mc_stop(rdev, &save);
4733 if (evergreen_mc_wait_for_idle(rdev)) {
4734 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4735 }
4736
4737 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4738 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4739
4740 if (reset_mask & RADEON_RESET_CP) {
4741 grbm_soft_reset |= SOFT_RESET_CP;
4742
4743 srbm_soft_reset |= SOFT_RESET_GRBM;
4744 }
4745
4746 if (reset_mask & RADEON_RESET_DMA)
4747 srbm_soft_reset |= SOFT_RESET_SDMA;
4748
4749 if (reset_mask & RADEON_RESET_DMA1)
4750 srbm_soft_reset |= SOFT_RESET_SDMA1;
4751
4752 if (reset_mask & RADEON_RESET_DISPLAY)
4753 srbm_soft_reset |= SOFT_RESET_DC;
4754
4755 if (reset_mask & RADEON_RESET_RLC)
4756 grbm_soft_reset |= SOFT_RESET_RLC;
4757
4758 if (reset_mask & RADEON_RESET_SEM)
4759 srbm_soft_reset |= SOFT_RESET_SEM;
4760
4761 if (reset_mask & RADEON_RESET_IH)
4762 srbm_soft_reset |= SOFT_RESET_IH;
4763
4764 if (reset_mask & RADEON_RESET_GRBM)
4765 srbm_soft_reset |= SOFT_RESET_GRBM;
4766
4767 if (reset_mask & RADEON_RESET_VMC)
4768 srbm_soft_reset |= SOFT_RESET_VMC;
4769
4770 if (!(rdev->flags & RADEON_IS_IGP)) {
4771 if (reset_mask & RADEON_RESET_MC)
4772 srbm_soft_reset |= SOFT_RESET_MC;
4773 }
4774
4775 if (grbm_soft_reset) {
4776 tmp = RREG32(GRBM_SOFT_RESET);
4777 tmp |= grbm_soft_reset;
4778 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4779 WREG32(GRBM_SOFT_RESET, tmp);
4780 tmp = RREG32(GRBM_SOFT_RESET);
4781
4782 udelay(50);
4783
4784 tmp &= ~grbm_soft_reset;
4785 WREG32(GRBM_SOFT_RESET, tmp);
4786 tmp = RREG32(GRBM_SOFT_RESET);
4787 }
4788
4789 if (srbm_soft_reset) {
4790 tmp = RREG32(SRBM_SOFT_RESET);
4791 tmp |= srbm_soft_reset;
4792 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4793 WREG32(SRBM_SOFT_RESET, tmp);
4794 tmp = RREG32(SRBM_SOFT_RESET);
4795
4796 udelay(50);
4797
4798 tmp &= ~srbm_soft_reset;
4799 WREG32(SRBM_SOFT_RESET, tmp);
4800 tmp = RREG32(SRBM_SOFT_RESET);
4801 }
4802
4803 /* Wait a little for things to settle down */
4804 udelay(50);
4805
4806 evergreen_mc_resume(rdev, &save);
4807 udelay(50);
4808
4809 cik_print_gpu_status_regs(rdev);
4810}
4811
4812/**
4813 * cik_asic_reset - soft reset GPU
4814 *
4815 * @rdev: radeon_device pointer
4816 *
4817 * Look up which blocks are hung and attempt
4818 * to reset them.
4819 * Returns 0 for success.
4820 */
4821int cik_asic_reset(struct radeon_device *rdev)
4822{
4823 u32 reset_mask;
4824
4825 reset_mask = cik_gpu_check_soft_reset(rdev);
4826
4827 if (reset_mask)
4828 r600_set_bios_scratch_engine_hung(rdev, true);
4829
4830 cik_gpu_soft_reset(rdev, reset_mask);
4831
4832 reset_mask = cik_gpu_check_soft_reset(rdev);
4833
4834 if (!reset_mask)
4835 r600_set_bios_scratch_engine_hung(rdev, false);
4836
4837 return 0;
4838}
4839
4840/**
4841 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004842 *
4843 * @rdev: radeon_device pointer
4844 * @ring: radeon_ring structure holding ring information
4845 *
4846 * Check if the 3D engine is locked up (CIK).
4847 * Returns true if the engine is locked, false if not.
4848 */
Alex Deuchercc066712013-04-09 12:59:51 -04004849bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004850{
Alex Deuchercc066712013-04-09 12:59:51 -04004851 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004852
Alex Deuchercc066712013-04-09 12:59:51 -04004853 if (!(reset_mask & (RADEON_RESET_GFX |
4854 RADEON_RESET_COMPUTE |
4855 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004856 radeon_ring_lockup_update(ring);
4857 return false;
4858 }
4859 /* force CP activities */
4860 radeon_ring_force_activity(rdev, ring);
4861 return radeon_ring_test_lockup(rdev, ring);
4862}
4863
4864/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004865 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4866 *
4867 * @rdev: radeon_device pointer
4868 * @ring: radeon_ring structure holding ring information
4869 *
4870 * Check if the async DMA engine is locked up (CIK).
4871 * Returns true if the engine appears to be locked up, false if not.
4872 */
4873bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4874{
Alex Deuchercc066712013-04-09 12:59:51 -04004875 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4876 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004877
4878 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004879 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004880 else
Alex Deuchercc066712013-04-09 12:59:51 -04004881 mask = RADEON_RESET_DMA1;
4882
4883 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004884 radeon_ring_lockup_update(ring);
4885 return false;
4886 }
4887 /* force ring activities */
4888 radeon_ring_force_activity(rdev, ring);
4889 return radeon_ring_test_lockup(rdev, ring);
4890}
4891
Alex Deucher1c491652013-04-09 12:45:26 -04004892/* MC */
4893/**
4894 * cik_mc_program - program the GPU memory controller
4895 *
4896 * @rdev: radeon_device pointer
4897 *
4898 * Set the location of vram, gart, and AGP in the GPU's
4899 * physical address space (CIK).
4900 */
4901static void cik_mc_program(struct radeon_device *rdev)
4902{
4903 struct evergreen_mc_save save;
4904 u32 tmp;
4905 int i, j;
4906
4907 /* Initialize HDP */
4908 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4909 WREG32((0x2c14 + j), 0x00000000);
4910 WREG32((0x2c18 + j), 0x00000000);
4911 WREG32((0x2c1c + j), 0x00000000);
4912 WREG32((0x2c20 + j), 0x00000000);
4913 WREG32((0x2c24 + j), 0x00000000);
4914 }
4915 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4916
4917 evergreen_mc_stop(rdev, &save);
4918 if (radeon_mc_wait_for_idle(rdev)) {
4919 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4920 }
4921 /* Lockout access through VGA aperture*/
4922 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4923 /* Update configuration */
4924 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4925 rdev->mc.vram_start >> 12);
4926 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4927 rdev->mc.vram_end >> 12);
4928 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4929 rdev->vram_scratch.gpu_addr >> 12);
4930 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4931 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4932 WREG32(MC_VM_FB_LOCATION, tmp);
4933 /* XXX double check these! */
4934 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4935 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4936 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4937 WREG32(MC_VM_AGP_BASE, 0);
4938 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4939 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4940 if (radeon_mc_wait_for_idle(rdev)) {
4941 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4942 }
4943 evergreen_mc_resume(rdev, &save);
4944 /* we need to own VRAM, so turn off the VGA renderer here
4945 * to stop it overwriting our objects */
4946 rv515_vga_render_disable(rdev);
4947}
4948
4949/**
4950 * cik_mc_init - initialize the memory controller driver params
4951 *
4952 * @rdev: radeon_device pointer
4953 *
4954 * Look up the amount of vram, vram width, and decide how to place
4955 * vram and gart within the GPU's physical address space (CIK).
4956 * Returns 0 for success.
4957 */
4958static int cik_mc_init(struct radeon_device *rdev)
4959{
4960 u32 tmp;
4961 int chansize, numchan;
4962
4963 /* Get VRAM informations */
4964 rdev->mc.vram_is_ddr = true;
4965 tmp = RREG32(MC_ARB_RAMCFG);
4966 if (tmp & CHANSIZE_MASK) {
4967 chansize = 64;
4968 } else {
4969 chansize = 32;
4970 }
4971 tmp = RREG32(MC_SHARED_CHMAP);
4972 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4973 case 0:
4974 default:
4975 numchan = 1;
4976 break;
4977 case 1:
4978 numchan = 2;
4979 break;
4980 case 2:
4981 numchan = 4;
4982 break;
4983 case 3:
4984 numchan = 8;
4985 break;
4986 case 4:
4987 numchan = 3;
4988 break;
4989 case 5:
4990 numchan = 6;
4991 break;
4992 case 6:
4993 numchan = 10;
4994 break;
4995 case 7:
4996 numchan = 12;
4997 break;
4998 case 8:
4999 numchan = 16;
5000 break;
5001 }
5002 rdev->mc.vram_width = numchan * chansize;
5003 /* Could aper size report 0 ? */
5004 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5005 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5006 /* size in MB on si */
5007 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5008 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5009 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5010 si_vram_gtt_location(rdev, &rdev->mc);
5011 radeon_update_bandwidth_info(rdev);
5012
5013 return 0;
5014}
5015
5016/*
5017 * GART
5018 * VMID 0 is the physical GPU addresses as used by the kernel.
5019 * VMIDs 1-15 are used for userspace clients and are handled
5020 * by the radeon vm/hsa code.
5021 */
5022/**
5023 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5024 *
5025 * @rdev: radeon_device pointer
5026 *
5027 * Flush the TLB for the VMID 0 page table (CIK).
5028 */
5029void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5030{
5031 /* flush hdp cache */
5032 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5033
5034 /* bits 0-15 are the VM contexts0-15 */
5035 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5036}
5037
5038/**
5039 * cik_pcie_gart_enable - gart enable
5040 *
5041 * @rdev: radeon_device pointer
5042 *
5043 * This sets up the TLBs, programs the page tables for VMID0,
5044 * sets up the hw for VMIDs 1-15 which are allocated on
5045 * demand, and sets up the global locations for the LDS, GDS,
5046 * and GPUVM for FSA64 clients (CIK).
5047 * Returns 0 for success, errors for failure.
5048 */
5049static int cik_pcie_gart_enable(struct radeon_device *rdev)
5050{
5051 int r, i;
5052
5053 if (rdev->gart.robj == NULL) {
5054 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5055 return -EINVAL;
5056 }
5057 r = radeon_gart_table_vram_pin(rdev);
5058 if (r)
5059 return r;
5060 radeon_gart_restore(rdev);
5061 /* Setup TLB control */
5062 WREG32(MC_VM_MX_L1_TLB_CNTL,
5063 (0xA << 7) |
5064 ENABLE_L1_TLB |
5065 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5066 ENABLE_ADVANCED_DRIVER_MODEL |
5067 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5068 /* Setup L2 cache */
5069 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5070 ENABLE_L2_FRAGMENT_PROCESSING |
5071 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5072 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5073 EFFECTIVE_L2_QUEUE_SIZE(7) |
5074 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5075 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5076 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5077 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5078 /* setup context0 */
5079 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5080 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5081 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5082 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5083 (u32)(rdev->dummy_page.addr >> 12));
5084 WREG32(VM_CONTEXT0_CNTL2, 0);
5085 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5086 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5087
5088 WREG32(0x15D4, 0);
5089 WREG32(0x15D8, 0);
5090 WREG32(0x15DC, 0);
5091
5092 /* empty context1-15 */
5093 /* FIXME start with 4G, once using 2 level pt switch to full
5094 * vm size space
5095 */
5096 /* set vm size, must be a multiple of 4 */
5097 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5098 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5099 for (i = 1; i < 16; i++) {
5100 if (i < 8)
5101 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5102 rdev->gart.table_addr >> 12);
5103 else
5104 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5105 rdev->gart.table_addr >> 12);
5106 }
5107
5108 /* enable context1-15 */
5109 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5110 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04005111 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04005112 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04005113 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5114 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5115 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5116 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5117 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5118 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5119 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5120 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5121 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5122 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5123 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5124 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04005125
5126 /* TC cache setup ??? */
5127 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5128 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5129 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5130
5131 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5132 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5133 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5134 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5135 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5136
5137 WREG32(TC_CFG_L1_VOLATILE, 0);
5138 WREG32(TC_CFG_L2_VOLATILE, 0);
5139
5140 if (rdev->family == CHIP_KAVERI) {
5141 u32 tmp = RREG32(CHUB_CONTROL);
5142 tmp &= ~BYPASS_VM;
5143 WREG32(CHUB_CONTROL, tmp);
5144 }
5145
5146 /* XXX SH_MEM regs */
5147 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04005148 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005149 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05005150 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04005151 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04005152 WREG32(SH_MEM_CONFIG, 0);
5153 WREG32(SH_MEM_APE1_BASE, 1);
5154 WREG32(SH_MEM_APE1_LIMIT, 0);
5155 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04005156 /* SDMA GFX */
5157 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5158 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5159 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5160 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5161 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04005162 }
Alex Deucherb556b122013-01-29 10:44:22 -05005163 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04005164 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04005165
5166 cik_pcie_gart_tlb_flush(rdev);
5167 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5168 (unsigned)(rdev->mc.gtt_size >> 20),
5169 (unsigned long long)rdev->gart.table_addr);
5170 rdev->gart.ready = true;
5171 return 0;
5172}
5173
5174/**
5175 * cik_pcie_gart_disable - gart disable
5176 *
5177 * @rdev: radeon_device pointer
5178 *
5179 * This disables all VM page table (CIK).
5180 */
5181static void cik_pcie_gart_disable(struct radeon_device *rdev)
5182{
5183 /* Disable all tables */
5184 WREG32(VM_CONTEXT0_CNTL, 0);
5185 WREG32(VM_CONTEXT1_CNTL, 0);
5186 /* Setup TLB control */
5187 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5188 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5189 /* Setup L2 cache */
5190 WREG32(VM_L2_CNTL,
5191 ENABLE_L2_FRAGMENT_PROCESSING |
5192 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5193 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5194 EFFECTIVE_L2_QUEUE_SIZE(7) |
5195 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5196 WREG32(VM_L2_CNTL2, 0);
5197 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5198 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5199 radeon_gart_table_vram_unpin(rdev);
5200}
5201
5202/**
5203 * cik_pcie_gart_fini - vm fini callback
5204 *
5205 * @rdev: radeon_device pointer
5206 *
5207 * Tears down the driver GART/VM setup (CIK).
5208 */
5209static void cik_pcie_gart_fini(struct radeon_device *rdev)
5210{
5211 cik_pcie_gart_disable(rdev);
5212 radeon_gart_table_vram_free(rdev);
5213 radeon_gart_fini(rdev);
5214}
5215
5216/* vm parser */
5217/**
5218 * cik_ib_parse - vm ib_parse callback
5219 *
5220 * @rdev: radeon_device pointer
5221 * @ib: indirect buffer pointer
5222 *
5223 * CIK uses hw IB checking so this is a nop (CIK).
5224 */
5225int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5226{
5227 return 0;
5228}
5229
5230/*
5231 * vm
5232 * VMID 0 is the physical GPU addresses as used by the kernel.
5233 * VMIDs 1-15 are used for userspace clients and are handled
5234 * by the radeon vm/hsa code.
5235 */
5236/**
5237 * cik_vm_init - cik vm init callback
5238 *
5239 * @rdev: radeon_device pointer
5240 *
5241 * Inits cik specific vm parameters (number of VMs, base of vram for
5242 * VMIDs 1-15) (CIK).
5243 * Returns 0 for success.
5244 */
5245int cik_vm_init(struct radeon_device *rdev)
5246{
5247 /* number of VMs */
5248 rdev->vm_manager.nvm = 16;
5249 /* base offset of vram pages */
5250 if (rdev->flags & RADEON_IS_IGP) {
5251 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5252 tmp <<= 22;
5253 rdev->vm_manager.vram_base_offset = tmp;
5254 } else
5255 rdev->vm_manager.vram_base_offset = 0;
5256
5257 return 0;
5258}
5259
5260/**
5261 * cik_vm_fini - cik vm fini callback
5262 *
5263 * @rdev: radeon_device pointer
5264 *
5265 * Tear down any asic specific VM setup (CIK).
5266 */
5267void cik_vm_fini(struct radeon_device *rdev)
5268{
5269}
5270
Alex Deucherf96ab482012-08-31 10:37:47 -04005271/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04005272 * cik_vm_decode_fault - print human readable fault info
5273 *
5274 * @rdev: radeon_device pointer
5275 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5276 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5277 *
5278 * Print human readable fault information (CIK).
5279 */
5280static void cik_vm_decode_fault(struct radeon_device *rdev,
5281 u32 status, u32 addr, u32 mc_client)
5282{
5283 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5284 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5285 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5286 char *block = (char *)&mc_client;
5287
5288 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5289 protections, vmid, addr,
5290 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5291 block, mc_id);
5292}
5293
5294/**
Alex Deucherf96ab482012-08-31 10:37:47 -04005295 * cik_vm_flush - cik vm flush using the CP
5296 *
5297 * @rdev: radeon_device pointer
5298 *
5299 * Update the page table base and flush the VM TLB
5300 * using the CP (CIK).
5301 */
5302void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5303{
5304 struct radeon_ring *ring = &rdev->ring[ridx];
5305
5306 if (vm == NULL)
5307 return;
5308
5309 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5310 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5311 WRITE_DATA_DST_SEL(0)));
5312 if (vm->id < 8) {
5313 radeon_ring_write(ring,
5314 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5315 } else {
5316 radeon_ring_write(ring,
5317 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5318 }
5319 radeon_ring_write(ring, 0);
5320 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5321
5322 /* update SH_MEM_* regs */
5323 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5324 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5325 WRITE_DATA_DST_SEL(0)));
5326 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5327 radeon_ring_write(ring, 0);
5328 radeon_ring_write(ring, VMID(vm->id));
5329
5330 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5331 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5332 WRITE_DATA_DST_SEL(0)));
5333 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5334 radeon_ring_write(ring, 0);
5335
5336 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5337 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5338 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5339 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5340
5341 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5342 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5343 WRITE_DATA_DST_SEL(0)));
5344 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5345 radeon_ring_write(ring, 0);
5346 radeon_ring_write(ring, VMID(0));
5347
5348 /* HDP flush */
5349 /* We should be using the WAIT_REG_MEM packet here like in
5350 * cik_fence_ring_emit(), but it causes the CP to hang in this
5351 * context...
5352 */
5353 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5354 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5355 WRITE_DATA_DST_SEL(0)));
5356 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5357 radeon_ring_write(ring, 0);
5358 radeon_ring_write(ring, 0);
5359
5360 /* bits 0-15 are the VM contexts0-15 */
5361 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5362 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5363 WRITE_DATA_DST_SEL(0)));
5364 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5365 radeon_ring_write(ring, 0);
5366 radeon_ring_write(ring, 1 << vm->id);
5367
Alex Deucherb07fdd32013-04-11 09:36:17 -04005368 /* compute doesn't have PFP */
5369 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5370 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5371 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5372 radeon_ring_write(ring, 0x0);
5373 }
Alex Deucherf96ab482012-08-31 10:37:47 -04005374}
5375
Alex Deucher605de6b2012-10-22 13:04:03 -04005376/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04005377 * cik_vm_set_page - update the page tables using sDMA
5378 *
5379 * @rdev: radeon_device pointer
5380 * @ib: indirect buffer to fill with commands
5381 * @pe: addr of the page entry
5382 * @addr: dst addr to write into pe
5383 * @count: number of page entries to update
5384 * @incr: increase next addr by incr bytes
5385 * @flags: access flags
5386 *
5387 * Update the page tables using CP or sDMA (CIK).
5388 */
5389void cik_vm_set_page(struct radeon_device *rdev,
5390 struct radeon_ib *ib,
5391 uint64_t pe,
5392 uint64_t addr, unsigned count,
5393 uint32_t incr, uint32_t flags)
5394{
5395 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5396 uint64_t value;
5397 unsigned ndw;
5398
5399 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5400 /* CP */
5401 while (count) {
5402 ndw = 2 + count * 2;
5403 if (ndw > 0x3FFE)
5404 ndw = 0x3FFE;
5405
5406 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5407 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5408 WRITE_DATA_DST_SEL(1));
5409 ib->ptr[ib->length_dw++] = pe;
5410 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5411 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5412 if (flags & RADEON_VM_PAGE_SYSTEM) {
5413 value = radeon_vm_map_gart(rdev, addr);
5414 value &= 0xFFFFFFFFFFFFF000ULL;
5415 } else if (flags & RADEON_VM_PAGE_VALID) {
5416 value = addr;
5417 } else {
5418 value = 0;
5419 }
5420 addr += incr;
5421 value |= r600_flags;
5422 ib->ptr[ib->length_dw++] = value;
5423 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5424 }
5425 }
5426 } else {
5427 /* DMA */
5428 if (flags & RADEON_VM_PAGE_SYSTEM) {
5429 while (count) {
5430 ndw = count * 2;
5431 if (ndw > 0xFFFFE)
5432 ndw = 0xFFFFE;
5433
5434 /* for non-physically contiguous pages (system) */
5435 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5436 ib->ptr[ib->length_dw++] = pe;
5437 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5438 ib->ptr[ib->length_dw++] = ndw;
5439 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5440 if (flags & RADEON_VM_PAGE_SYSTEM) {
5441 value = radeon_vm_map_gart(rdev, addr);
5442 value &= 0xFFFFFFFFFFFFF000ULL;
5443 } else if (flags & RADEON_VM_PAGE_VALID) {
5444 value = addr;
5445 } else {
5446 value = 0;
5447 }
5448 addr += incr;
5449 value |= r600_flags;
5450 ib->ptr[ib->length_dw++] = value;
5451 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5452 }
5453 }
5454 } else {
5455 while (count) {
5456 ndw = count;
5457 if (ndw > 0x7FFFF)
5458 ndw = 0x7FFFF;
5459
5460 if (flags & RADEON_VM_PAGE_VALID)
5461 value = addr;
5462 else
5463 value = 0;
5464 /* for physically contiguous pages (vram) */
5465 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5466 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5467 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5468 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5469 ib->ptr[ib->length_dw++] = 0;
5470 ib->ptr[ib->length_dw++] = value; /* value */
5471 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5472 ib->ptr[ib->length_dw++] = incr; /* increment size */
5473 ib->ptr[ib->length_dw++] = 0;
5474 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5475 pe += ndw * 8;
5476 addr += ndw * incr;
5477 count -= ndw;
5478 }
5479 }
5480 while (ib->length_dw & 0x7)
5481 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5482 }
5483}
5484
5485/**
Alex Deucher605de6b2012-10-22 13:04:03 -04005486 * cik_dma_vm_flush - cik vm flush using sDMA
5487 *
5488 * @rdev: radeon_device pointer
5489 *
5490 * Update the page table base and flush the VM TLB
5491 * using sDMA (CIK).
5492 */
5493void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5494{
5495 struct radeon_ring *ring = &rdev->ring[ridx];
5496 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5497 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5498 u32 ref_and_mask;
5499
5500 if (vm == NULL)
5501 return;
5502
5503 if (ridx == R600_RING_TYPE_DMA_INDEX)
5504 ref_and_mask = SDMA0;
5505 else
5506 ref_and_mask = SDMA1;
5507
5508 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5509 if (vm->id < 8) {
5510 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5511 } else {
5512 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5513 }
5514 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5515
5516 /* update SH_MEM_* regs */
5517 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5518 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5519 radeon_ring_write(ring, VMID(vm->id));
5520
5521 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5522 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5523 radeon_ring_write(ring, 0);
5524
5525 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5526 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5527 radeon_ring_write(ring, 0);
5528
5529 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5530 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5531 radeon_ring_write(ring, 1);
5532
5533 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5534 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5535 radeon_ring_write(ring, 0);
5536
5537 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5538 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5539 radeon_ring_write(ring, VMID(0));
5540
5541 /* flush HDP */
5542 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5543 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5544 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5545 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5546 radeon_ring_write(ring, ref_and_mask); /* MASK */
5547 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5548
5549 /* flush TLB */
5550 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5551 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5552 radeon_ring_write(ring, 1 << vm->id);
5553}
5554
Alex Deucherf6796ca2012-11-09 10:44:08 -05005555/*
5556 * RLC
5557 * The RLC is a multi-purpose microengine that handles a
5558 * variety of functions, the most important of which is
5559 * the interrupt controller.
5560 */
Alex Deucher866d83d2013-04-15 17:13:29 -04005561static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5562 bool enable)
Alex Deucherf6796ca2012-11-09 10:44:08 -05005563{
Alex Deucher866d83d2013-04-15 17:13:29 -04005564 u32 tmp = RREG32(CP_INT_CNTL_RING0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005565
Alex Deucher866d83d2013-04-15 17:13:29 -04005566 if (enable)
5567 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5568 else
5569 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005570 WREG32(CP_INT_CNTL_RING0, tmp);
Alex Deucher866d83d2013-04-15 17:13:29 -04005571}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005572
Alex Deucher866d83d2013-04-15 17:13:29 -04005573static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5574{
5575 u32 tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005576
Alex Deucher866d83d2013-04-15 17:13:29 -04005577 tmp = RREG32(RLC_LB_CNTL);
5578 if (enable)
5579 tmp |= LOAD_BALANCE_ENABLE;
5580 else
5581 tmp &= ~LOAD_BALANCE_ENABLE;
5582 WREG32(RLC_LB_CNTL, tmp);
5583}
Alex Deucherf6796ca2012-11-09 10:44:08 -05005584
Alex Deucher866d83d2013-04-15 17:13:29 -04005585static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5586{
5587 u32 i, j, k;
5588 u32 mask;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005589
5590 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5591 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5592 cik_select_se_sh(rdev, i, j);
5593 for (k = 0; k < rdev->usec_timeout; k++) {
5594 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5595 break;
5596 udelay(1);
5597 }
5598 }
5599 }
5600 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5601
5602 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5603 for (k = 0; k < rdev->usec_timeout; k++) {
5604 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5605 break;
5606 udelay(1);
5607 }
5608}
5609
Alex Deucher22c775c2013-07-23 09:41:05 -04005610static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5611{
5612 u32 tmp;
5613
5614 tmp = RREG32(RLC_CNTL);
5615 if (tmp != rlc)
5616 WREG32(RLC_CNTL, rlc);
5617}
5618
5619static u32 cik_halt_rlc(struct radeon_device *rdev)
5620{
5621 u32 data, orig;
5622
5623 orig = data = RREG32(RLC_CNTL);
5624
5625 if (data & RLC_ENABLE) {
5626 u32 i;
5627
5628 data &= ~RLC_ENABLE;
5629 WREG32(RLC_CNTL, data);
5630
5631 for (i = 0; i < rdev->usec_timeout; i++) {
5632 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5633 break;
5634 udelay(1);
5635 }
5636
5637 cik_wait_for_rlc_serdes(rdev);
5638 }
5639
5640 return orig;
5641}
5642
Alex Deuchera412fce2013-04-22 20:23:31 -04005643void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5644{
5645 u32 tmp, i, mask;
5646
5647 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5648 WREG32(RLC_GPR_REG2, tmp);
5649
5650 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5651 for (i = 0; i < rdev->usec_timeout; i++) {
5652 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5653 break;
5654 udelay(1);
5655 }
5656
5657 for (i = 0; i < rdev->usec_timeout; i++) {
5658 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5659 break;
5660 udelay(1);
5661 }
5662}
5663
5664void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5665{
5666 u32 tmp;
5667
5668 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5669 WREG32(RLC_GPR_REG2, tmp);
5670}
5671
Alex Deucherf6796ca2012-11-09 10:44:08 -05005672/**
Alex Deucher866d83d2013-04-15 17:13:29 -04005673 * cik_rlc_stop - stop the RLC ME
5674 *
5675 * @rdev: radeon_device pointer
5676 *
5677 * Halt the RLC ME (MicroEngine) (CIK).
5678 */
5679static void cik_rlc_stop(struct radeon_device *rdev)
5680{
Alex Deucher22c775c2013-07-23 09:41:05 -04005681 WREG32(RLC_CNTL, 0);
Alex Deucher866d83d2013-04-15 17:13:29 -04005682
5683 cik_enable_gui_idle_interrupt(rdev, false);
5684
Alex Deucher866d83d2013-04-15 17:13:29 -04005685 cik_wait_for_rlc_serdes(rdev);
5686}
5687
5688/**
Alex Deucherf6796ca2012-11-09 10:44:08 -05005689 * cik_rlc_start - start the RLC ME
5690 *
5691 * @rdev: radeon_device pointer
5692 *
5693 * Unhalt the RLC ME (MicroEngine) (CIK).
5694 */
5695static void cik_rlc_start(struct radeon_device *rdev)
5696{
Alex Deucherf6796ca2012-11-09 10:44:08 -05005697 WREG32(RLC_CNTL, RLC_ENABLE);
5698
Alex Deucher866d83d2013-04-15 17:13:29 -04005699 cik_enable_gui_idle_interrupt(rdev, true);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005700
5701 udelay(50);
5702}
5703
5704/**
5705 * cik_rlc_resume - setup the RLC hw
5706 *
5707 * @rdev: radeon_device pointer
5708 *
5709 * Initialize the RLC registers, load the ucode,
5710 * and start the RLC (CIK).
5711 * Returns 0 for success, -EINVAL if the ucode is not available.
5712 */
5713static int cik_rlc_resume(struct radeon_device *rdev)
5714{
Alex Deucher22c775c2013-07-23 09:41:05 -04005715 u32 i, size, tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05005716 const __be32 *fw_data;
5717
5718 if (!rdev->rlc_fw)
5719 return -EINVAL;
5720
5721 switch (rdev->family) {
5722 case CHIP_BONAIRE:
5723 default:
5724 size = BONAIRE_RLC_UCODE_SIZE;
5725 break;
5726 case CHIP_KAVERI:
5727 size = KV_RLC_UCODE_SIZE;
5728 break;
5729 case CHIP_KABINI:
5730 size = KB_RLC_UCODE_SIZE;
5731 break;
5732 }
5733
5734 cik_rlc_stop(rdev);
5735
Alex Deucher22c775c2013-07-23 09:41:05 -04005736 /* disable CG */
5737 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5738 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5739
Alex Deucher866d83d2013-04-15 17:13:29 -04005740 si_rlc_reset(rdev);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005741
Alex Deucher22c775c2013-07-23 09:41:05 -04005742 cik_init_pg(rdev);
5743
5744 cik_init_cg(rdev);
5745
Alex Deucherf6796ca2012-11-09 10:44:08 -05005746 WREG32(RLC_LB_CNTR_INIT, 0);
5747 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5748
5749 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5750 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5751 WREG32(RLC_LB_PARAMS, 0x00600408);
5752 WREG32(RLC_LB_CNTL, 0x80000004);
5753
5754 WREG32(RLC_MC_CNTL, 0);
5755 WREG32(RLC_UCODE_CNTL, 0);
5756
5757 fw_data = (const __be32 *)rdev->rlc_fw->data;
5758 WREG32(RLC_GPM_UCODE_ADDR, 0);
5759 for (i = 0; i < size; i++)
5760 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5761 WREG32(RLC_GPM_UCODE_ADDR, 0);
5762
Alex Deucher866d83d2013-04-15 17:13:29 -04005763 /* XXX - find out what chips support lbpw */
5764 cik_enable_lbpw(rdev, false);
5765
Alex Deucher22c775c2013-07-23 09:41:05 -04005766 if (rdev->family == CHIP_BONAIRE)
5767 WREG32(RLC_DRIVER_DMA_STATUS, 0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05005768
5769 cik_rlc_start(rdev);
5770
5771 return 0;
5772}
Alex Deuchera59781b2012-11-09 10:45:57 -05005773
Alex Deucher22c775c2013-07-23 09:41:05 -04005774static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5775{
5776 u32 data, orig, tmp, tmp2;
5777
5778 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5779
5780 cik_enable_gui_idle_interrupt(rdev, enable);
5781
5782 if (enable) {
5783 tmp = cik_halt_rlc(rdev);
5784
5785 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5786 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5787 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5788 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5789 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5790
5791 cik_update_rlc(rdev, tmp);
5792
5793 data |= CGCG_EN | CGLS_EN;
5794 } else {
5795 RREG32(CB_CGTT_SCLK_CTRL);
5796 RREG32(CB_CGTT_SCLK_CTRL);
5797 RREG32(CB_CGTT_SCLK_CTRL);
5798 RREG32(CB_CGTT_SCLK_CTRL);
5799
5800 data &= ~(CGCG_EN | CGLS_EN);
5801 }
5802
5803 if (orig != data)
5804 WREG32(RLC_CGCG_CGLS_CTRL, data);
5805
5806}
5807
5808static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5809{
5810 u32 data, orig, tmp = 0;
5811
5812 if (enable) {
5813 orig = data = RREG32(CP_MEM_SLP_CNTL);
5814 data |= CP_MEM_LS_EN;
5815 if (orig != data)
5816 WREG32(CP_MEM_SLP_CNTL, data);
5817
5818 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5819 data &= 0xfffffffd;
5820 if (orig != data)
5821 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5822
5823 tmp = cik_halt_rlc(rdev);
5824
5825 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5826 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5827 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5828 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5829 WREG32(RLC_SERDES_WR_CTRL, data);
5830
5831 cik_update_rlc(rdev, tmp);
5832
5833 orig = data = RREG32(CGTS_SM_CTRL_REG);
5834 data &= ~SM_MODE_MASK;
5835 data |= SM_MODE(0x2);
5836 data |= SM_MODE_ENABLE;
5837 data &= ~CGTS_OVERRIDE;
5838 data &= ~CGTS_LS_OVERRIDE;
5839 data &= ~ON_MONITOR_ADD_MASK;
5840 data |= ON_MONITOR_ADD_EN;
5841 data |= ON_MONITOR_ADD(0x96);
5842 if (orig != data)
5843 WREG32(CGTS_SM_CTRL_REG, data);
5844 } else {
5845 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5846 data |= 0x00000002;
5847 if (orig != data)
5848 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5849
5850 data = RREG32(RLC_MEM_SLP_CNTL);
5851 if (data & RLC_MEM_LS_EN) {
5852 data &= ~RLC_MEM_LS_EN;
5853 WREG32(RLC_MEM_SLP_CNTL, data);
5854 }
5855
5856 data = RREG32(CP_MEM_SLP_CNTL);
5857 if (data & CP_MEM_LS_EN) {
5858 data &= ~CP_MEM_LS_EN;
5859 WREG32(CP_MEM_SLP_CNTL, data);
5860 }
5861
5862 orig = data = RREG32(CGTS_SM_CTRL_REG);
5863 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5864 if (orig != data)
5865 WREG32(CGTS_SM_CTRL_REG, data);
5866
5867 tmp = cik_halt_rlc(rdev);
5868
5869 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5870 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5871 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5872 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5873 WREG32(RLC_SERDES_WR_CTRL, data);
5874
5875 cik_update_rlc(rdev, tmp);
5876 }
5877}
5878
5879static const u32 mc_cg_registers[] =
5880{
5881 MC_HUB_MISC_HUB_CG,
5882 MC_HUB_MISC_SIP_CG,
5883 MC_HUB_MISC_VM_CG,
5884 MC_XPB_CLK_GAT,
5885 ATC_MISC_CG,
5886 MC_CITF_MISC_WR_CG,
5887 MC_CITF_MISC_RD_CG,
5888 MC_CITF_MISC_VM_CG,
5889 VM_L2_CG,
5890};
5891
5892static void cik_enable_mc_ls(struct radeon_device *rdev,
5893 bool enable)
5894{
5895 int i;
5896 u32 orig, data;
5897
5898 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5899 orig = data = RREG32(mc_cg_registers[i]);
5900 if (enable)
5901 data |= MC_LS_ENABLE;
5902 else
5903 data &= ~MC_LS_ENABLE;
5904 if (data != orig)
5905 WREG32(mc_cg_registers[i], data);
5906 }
5907}
5908
5909static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5910 bool enable)
5911{
5912 int i;
5913 u32 orig, data;
5914
5915 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5916 orig = data = RREG32(mc_cg_registers[i]);
5917 if (enable)
5918 data |= MC_CG_ENABLE;
5919 else
5920 data &= ~MC_CG_ENABLE;
5921 if (data != orig)
5922 WREG32(mc_cg_registers[i], data);
5923 }
5924}
5925
5926static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5927 bool enable)
5928{
5929 u32 orig, data;
5930
5931 if (enable) {
5932 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5933 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5934 } else {
5935 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5936 data |= 0xff000000;
5937 if (data != orig)
5938 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5939
5940 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5941 data |= 0xff000000;
5942 if (data != orig)
5943 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5944 }
5945}
5946
5947static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5948 bool enable)
5949{
5950 u32 orig, data;
5951
5952 if (enable) {
5953 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5954 data |= 0x100;
5955 if (orig != data)
5956 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5957
5958 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5959 data |= 0x100;
5960 if (orig != data)
5961 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5962 } else {
5963 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5964 data &= ~0x100;
5965 if (orig != data)
5966 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5967
5968 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5969 data &= ~0x100;
5970 if (orig != data)
5971 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5972 }
5973}
5974
5975static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5976 bool enable)
5977{
5978 u32 orig, data;
5979
5980 if (enable) {
5981 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5982 data = 0xfff;
5983 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5984
5985 orig = data = RREG32(UVD_CGC_CTRL);
5986 data |= DCM;
5987 if (orig != data)
5988 WREG32(UVD_CGC_CTRL, data);
5989 } else {
5990 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5991 data &= ~0xfff;
5992 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5993
5994 orig = data = RREG32(UVD_CGC_CTRL);
5995 data &= ~DCM;
5996 if (orig != data)
5997 WREG32(UVD_CGC_CTRL, data);
5998 }
5999}
6000
6001static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6002 bool enable)
6003{
6004 u32 orig, data;
6005
6006 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6007
6008 if (enable)
6009 data &= ~CLOCK_GATING_DIS;
6010 else
6011 data |= CLOCK_GATING_DIS;
6012
6013 if (orig != data)
6014 WREG32(HDP_HOST_PATH_CNTL, data);
6015}
6016
6017static void cik_enable_hdp_ls(struct radeon_device *rdev,
6018 bool enable)
6019{
6020 u32 orig, data;
6021
6022 orig = data = RREG32(HDP_MEM_POWER_LS);
6023
6024 if (enable)
6025 data |= HDP_LS_ENABLE;
6026 else
6027 data &= ~HDP_LS_ENABLE;
6028
6029 if (orig != data)
6030 WREG32(HDP_MEM_POWER_LS, data);
6031}
6032
6033void cik_update_cg(struct radeon_device *rdev,
6034 u32 block, bool enable)
6035{
6036 if (block & RADEON_CG_BLOCK_GFX) {
6037 /* order matters! */
6038 if (enable) {
6039 cik_enable_mgcg(rdev, true);
6040 cik_enable_cgcg(rdev, true);
6041 } else {
6042 cik_enable_cgcg(rdev, false);
6043 cik_enable_mgcg(rdev, false);
6044 }
6045 }
6046
6047 if (block & RADEON_CG_BLOCK_MC) {
6048 if (!(rdev->flags & RADEON_IS_IGP)) {
6049 cik_enable_mc_mgcg(rdev, enable);
6050 cik_enable_mc_ls(rdev, enable);
6051 }
6052 }
6053
6054 if (block & RADEON_CG_BLOCK_SDMA) {
6055 cik_enable_sdma_mgcg(rdev, enable);
6056 cik_enable_sdma_mgls(rdev, enable);
6057 }
6058
6059 if (block & RADEON_CG_BLOCK_UVD) {
6060 if (rdev->has_uvd)
6061 cik_enable_uvd_mgcg(rdev, enable);
6062 }
6063
6064 if (block & RADEON_CG_BLOCK_HDP) {
6065 cik_enable_hdp_mgcg(rdev, enable);
6066 cik_enable_hdp_ls(rdev, enable);
6067 }
6068}
6069
6070static void cik_init_cg(struct radeon_device *rdev)
6071{
6072
6073 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6074
6075 if (rdev->has_uvd)
6076 si_init_uvd_internal_cg(rdev);
6077
6078 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6079 RADEON_CG_BLOCK_SDMA |
6080 RADEON_CG_BLOCK_UVD |
6081 RADEON_CG_BLOCK_HDP), true);
6082}
6083
6084static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6085 bool enable)
6086{
6087 u32 data, orig;
6088
6089 orig = data = RREG32(RLC_PG_CNTL);
6090 if (enable)
6091 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6092 else
6093 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6094 if (orig != data)
6095 WREG32(RLC_PG_CNTL, data);
6096}
6097
6098static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6099 bool enable)
6100{
6101 u32 data, orig;
6102
6103 orig = data = RREG32(RLC_PG_CNTL);
6104 if (enable)
6105 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6106 else
6107 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6108 if (orig != data)
6109 WREG32(RLC_PG_CNTL, data);
6110}
6111
6112static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6113{
6114 u32 data, orig;
6115
6116 orig = data = RREG32(RLC_PG_CNTL);
6117 if (enable)
6118 data &= ~DISABLE_CP_PG;
6119 else
6120 data |= DISABLE_CP_PG;
6121 if (orig != data)
6122 WREG32(RLC_PG_CNTL, data);
6123}
6124
6125static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6126{
6127 u32 data, orig;
6128
6129 orig = data = RREG32(RLC_PG_CNTL);
6130 if (enable)
6131 data &= ~DISABLE_GDS_PG;
6132 else
6133 data |= DISABLE_GDS_PG;
6134 if (orig != data)
6135 WREG32(RLC_PG_CNTL, data);
6136}
6137
6138#define CP_ME_TABLE_SIZE 96
6139#define CP_ME_TABLE_OFFSET 2048
6140#define CP_MEC_TABLE_OFFSET 4096
6141
6142void cik_init_cp_pg_table(struct radeon_device *rdev)
6143{
6144 const __be32 *fw_data;
6145 volatile u32 *dst_ptr;
6146 int me, i, max_me = 4;
6147 u32 bo_offset = 0;
6148 u32 table_offset;
6149
6150 if (rdev->family == CHIP_KAVERI)
6151 max_me = 5;
6152
6153 if (rdev->rlc.cp_table_ptr == NULL)
6154 return;
6155
6156 /* write the cp table buffer */
6157 dst_ptr = rdev->rlc.cp_table_ptr;
6158 for (me = 0; me < max_me; me++) {
6159 if (me == 0) {
6160 fw_data = (const __be32 *)rdev->ce_fw->data;
6161 table_offset = CP_ME_TABLE_OFFSET;
6162 } else if (me == 1) {
6163 fw_data = (const __be32 *)rdev->pfp_fw->data;
6164 table_offset = CP_ME_TABLE_OFFSET;
6165 } else if (me == 2) {
6166 fw_data = (const __be32 *)rdev->me_fw->data;
6167 table_offset = CP_ME_TABLE_OFFSET;
6168 } else {
6169 fw_data = (const __be32 *)rdev->mec_fw->data;
6170 table_offset = CP_MEC_TABLE_OFFSET;
6171 }
6172
6173 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6174 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6175 }
6176 bo_offset += CP_ME_TABLE_SIZE;
6177 }
6178}
6179
6180static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6181 bool enable)
6182{
6183 u32 data, orig;
6184
6185 if (enable) {
6186 orig = data = RREG32(RLC_PG_CNTL);
6187 data |= GFX_PG_ENABLE;
6188 if (orig != data)
6189 WREG32(RLC_PG_CNTL, data);
6190
6191 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6192 data |= AUTO_PG_EN;
6193 if (orig != data)
6194 WREG32(RLC_AUTO_PG_CTRL, data);
6195 } else {
6196 orig = data = RREG32(RLC_PG_CNTL);
6197 data &= ~GFX_PG_ENABLE;
6198 if (orig != data)
6199 WREG32(RLC_PG_CNTL, data);
6200
6201 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6202 data &= ~AUTO_PG_EN;
6203 if (orig != data)
6204 WREG32(RLC_AUTO_PG_CTRL, data);
6205
6206 data = RREG32(DB_RENDER_CONTROL);
6207 }
6208}
6209
6210static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6211{
6212 u32 mask = 0, tmp, tmp1;
6213 int i;
6214
6215 cik_select_se_sh(rdev, se, sh);
6216 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6217 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6218 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6219
6220 tmp &= 0xffff0000;
6221
6222 tmp |= tmp1;
6223 tmp >>= 16;
6224
6225 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6226 mask <<= 1;
6227 mask |= 1;
6228 }
6229
6230 return (~tmp) & mask;
6231}
6232
6233static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6234{
6235 u32 i, j, k, active_cu_number = 0;
6236 u32 mask, counter, cu_bitmap;
6237 u32 tmp = 0;
6238
6239 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6240 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6241 mask = 1;
6242 cu_bitmap = 0;
6243 counter = 0;
6244 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6245 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6246 if (counter < 2)
6247 cu_bitmap |= mask;
6248 counter ++;
6249 }
6250 mask <<= 1;
6251 }
6252
6253 active_cu_number += counter;
6254 tmp |= (cu_bitmap << (i * 16 + j * 8));
6255 }
6256 }
6257
6258 WREG32(RLC_PG_AO_CU_MASK, tmp);
6259
6260 tmp = RREG32(RLC_MAX_PG_CU);
6261 tmp &= ~MAX_PU_CU_MASK;
6262 tmp |= MAX_PU_CU(active_cu_number);
6263 WREG32(RLC_MAX_PG_CU, tmp);
6264}
6265
6266static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6267 bool enable)
6268{
6269 u32 data, orig;
6270
6271 orig = data = RREG32(RLC_PG_CNTL);
6272 if (enable)
6273 data |= STATIC_PER_CU_PG_ENABLE;
6274 else
6275 data &= ~STATIC_PER_CU_PG_ENABLE;
6276 if (orig != data)
6277 WREG32(RLC_PG_CNTL, data);
6278}
6279
6280static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6281 bool enable)
6282{
6283 u32 data, orig;
6284
6285 orig = data = RREG32(RLC_PG_CNTL);
6286 if (enable)
6287 data |= DYN_PER_CU_PG_ENABLE;
6288 else
6289 data &= ~DYN_PER_CU_PG_ENABLE;
6290 if (orig != data)
6291 WREG32(RLC_PG_CNTL, data);
6292}
6293
6294#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6295#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6296
6297static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6298{
6299 u32 data, orig;
6300 u32 i;
6301
6302 if (rdev->rlc.cs_data) {
6303 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6304 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6305 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6306 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6307 } else {
6308 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6309 for (i = 0; i < 3; i++)
6310 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6311 }
6312 if (rdev->rlc.reg_list) {
6313 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6314 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6315 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6316 }
6317
6318 orig = data = RREG32(RLC_PG_CNTL);
6319 data |= GFX_PG_SRC;
6320 if (orig != data)
6321 WREG32(RLC_PG_CNTL, data);
6322
6323 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6324 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6325
6326 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6327 data &= ~IDLE_POLL_COUNT_MASK;
6328 data |= IDLE_POLL_COUNT(0x60);
6329 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6330
6331 data = 0x10101010;
6332 WREG32(RLC_PG_DELAY, data);
6333
6334 data = RREG32(RLC_PG_DELAY_2);
6335 data &= ~0xff;
6336 data |= 0x3;
6337 WREG32(RLC_PG_DELAY_2, data);
6338
6339 data = RREG32(RLC_AUTO_PG_CTRL);
6340 data &= ~GRBM_REG_SGIT_MASK;
6341 data |= GRBM_REG_SGIT(0x700);
6342 WREG32(RLC_AUTO_PG_CTRL, data);
6343
6344}
6345
6346static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6347{
6348 bool has_pg = false;
6349 bool has_dyn_mgpg = false;
6350 bool has_static_mgpg = false;
6351
6352 /* only APUs have PG */
6353 if (rdev->flags & RADEON_IS_IGP) {
6354 has_pg = true;
6355 has_static_mgpg = true;
6356 if (rdev->family == CHIP_KAVERI)
6357 has_dyn_mgpg = true;
6358 }
6359
6360 if (has_pg) {
6361 cik_enable_gfx_cgpg(rdev, enable);
6362 if (enable) {
6363 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6364 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6365 } else {
6366 cik_enable_gfx_static_mgpg(rdev, false);
6367 cik_enable_gfx_dynamic_mgpg(rdev, false);
6368 }
6369 }
6370
6371}
6372
6373void cik_init_pg(struct radeon_device *rdev)
6374{
6375 bool has_pg = false;
6376
6377 /* only APUs have PG */
6378 if (rdev->flags & RADEON_IS_IGP) {
6379 /* XXX disable this for now */
6380 /* has_pg = true; */
6381 }
6382
6383 if (has_pg) {
6384 cik_enable_sck_slowdown_on_pu(rdev, true);
6385 cik_enable_sck_slowdown_on_pd(rdev, true);
6386 cik_init_gfx_cgpg(rdev);
6387 cik_enable_cp_pg(rdev, true);
6388 cik_enable_gds_pg(rdev, true);
6389 cik_init_ao_cu_mask(rdev);
6390 cik_update_gfx_pg(rdev, true);
6391 }
6392}
6393
Alex Deuchera59781b2012-11-09 10:45:57 -05006394/*
6395 * Interrupts
6396 * Starting with r6xx, interrupts are handled via a ring buffer.
6397 * Ring buffers are areas of GPU accessible memory that the GPU
6398 * writes interrupt vectors into and the host reads vectors out of.
6399 * There is a rptr (read pointer) that determines where the
6400 * host is currently reading, and a wptr (write pointer)
6401 * which determines where the GPU has written. When the
6402 * pointers are equal, the ring is idle. When the GPU
6403 * writes vectors to the ring buffer, it increments the
6404 * wptr. When there is an interrupt, the host then starts
6405 * fetching commands and processing them until the pointers are
6406 * equal again at which point it updates the rptr.
6407 */
6408
6409/**
6410 * cik_enable_interrupts - Enable the interrupt ring buffer
6411 *
6412 * @rdev: radeon_device pointer
6413 *
6414 * Enable the interrupt ring buffer (CIK).
6415 */
6416static void cik_enable_interrupts(struct radeon_device *rdev)
6417{
6418 u32 ih_cntl = RREG32(IH_CNTL);
6419 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6420
6421 ih_cntl |= ENABLE_INTR;
6422 ih_rb_cntl |= IH_RB_ENABLE;
6423 WREG32(IH_CNTL, ih_cntl);
6424 WREG32(IH_RB_CNTL, ih_rb_cntl);
6425 rdev->ih.enabled = true;
6426}
6427
6428/**
6429 * cik_disable_interrupts - Disable the interrupt ring buffer
6430 *
6431 * @rdev: radeon_device pointer
6432 *
6433 * Disable the interrupt ring buffer (CIK).
6434 */
6435static void cik_disable_interrupts(struct radeon_device *rdev)
6436{
6437 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6438 u32 ih_cntl = RREG32(IH_CNTL);
6439
6440 ih_rb_cntl &= ~IH_RB_ENABLE;
6441 ih_cntl &= ~ENABLE_INTR;
6442 WREG32(IH_RB_CNTL, ih_rb_cntl);
6443 WREG32(IH_CNTL, ih_cntl);
6444 /* set rptr, wptr to 0 */
6445 WREG32(IH_RB_RPTR, 0);
6446 WREG32(IH_RB_WPTR, 0);
6447 rdev->ih.enabled = false;
6448 rdev->ih.rptr = 0;
6449}
6450
6451/**
6452 * cik_disable_interrupt_state - Disable all interrupt sources
6453 *
6454 * @rdev: radeon_device pointer
6455 *
6456 * Clear all interrupt enable bits used by the driver (CIK).
6457 */
6458static void cik_disable_interrupt_state(struct radeon_device *rdev)
6459{
6460 u32 tmp;
6461
6462 /* gfx ring */
6463 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04006464 /* sdma */
6465 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6466 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6467 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6468 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05006469 /* compute queues */
6470 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6471 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6472 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6473 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6474 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6475 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6476 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6477 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6478 /* grbm */
6479 WREG32(GRBM_INT_CNTL, 0);
6480 /* vline/vblank, etc. */
6481 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6482 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6483 if (rdev->num_crtc >= 4) {
6484 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6485 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6486 }
6487 if (rdev->num_crtc >= 6) {
6488 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6489 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6490 }
6491
6492 /* dac hotplug */
6493 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6494
6495 /* digital hotplug */
6496 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6497 WREG32(DC_HPD1_INT_CONTROL, tmp);
6498 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6499 WREG32(DC_HPD2_INT_CONTROL, tmp);
6500 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6501 WREG32(DC_HPD3_INT_CONTROL, tmp);
6502 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6503 WREG32(DC_HPD4_INT_CONTROL, tmp);
6504 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6505 WREG32(DC_HPD5_INT_CONTROL, tmp);
6506 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6507 WREG32(DC_HPD6_INT_CONTROL, tmp);
6508
6509}
6510
6511/**
6512 * cik_irq_init - init and enable the interrupt ring
6513 *
6514 * @rdev: radeon_device pointer
6515 *
6516 * Allocate a ring buffer for the interrupt controller,
6517 * enable the RLC, disable interrupts, enable the IH
6518 * ring buffer and enable it (CIK).
6519 * Called at device load and reume.
6520 * Returns 0 for success, errors for failure.
6521 */
6522static int cik_irq_init(struct radeon_device *rdev)
6523{
6524 int ret = 0;
6525 int rb_bufsz;
6526 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6527
6528 /* allocate ring */
6529 ret = r600_ih_ring_alloc(rdev);
6530 if (ret)
6531 return ret;
6532
6533 /* disable irqs */
6534 cik_disable_interrupts(rdev);
6535
6536 /* init rlc */
6537 ret = cik_rlc_resume(rdev);
6538 if (ret) {
6539 r600_ih_ring_fini(rdev);
6540 return ret;
6541 }
6542
6543 /* setup interrupt control */
6544 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6545 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6546 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6547 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6548 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6549 */
6550 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6551 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6552 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6553 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6554
6555 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6556 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6557
6558 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6559 IH_WPTR_OVERFLOW_CLEAR |
6560 (rb_bufsz << 1));
6561
6562 if (rdev->wb.enabled)
6563 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6564
6565 /* set the writeback address whether it's enabled or not */
6566 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6567 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6568
6569 WREG32(IH_RB_CNTL, ih_rb_cntl);
6570
6571 /* set rptr, wptr to 0 */
6572 WREG32(IH_RB_RPTR, 0);
6573 WREG32(IH_RB_WPTR, 0);
6574
6575 /* Default settings for IH_CNTL (disabled at first) */
6576 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6577 /* RPTR_REARM only works if msi's are enabled */
6578 if (rdev->msi_enabled)
6579 ih_cntl |= RPTR_REARM;
6580 WREG32(IH_CNTL, ih_cntl);
6581
6582 /* force the active interrupt state to all disabled */
6583 cik_disable_interrupt_state(rdev);
6584
6585 pci_set_master(rdev->pdev);
6586
6587 /* enable irqs */
6588 cik_enable_interrupts(rdev);
6589
6590 return ret;
6591}
6592
6593/**
6594 * cik_irq_set - enable/disable interrupt sources
6595 *
6596 * @rdev: radeon_device pointer
6597 *
6598 * Enable interrupt sources on the GPU (vblanks, hpd,
6599 * etc.) (CIK).
6600 * Returns 0 for success, errors for failure.
6601 */
6602int cik_irq_set(struct radeon_device *rdev)
6603{
6604 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6605 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04006606 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6607 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05006608 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6609 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6610 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04006611 u32 dma_cntl, dma_cntl1;
Alex Deucher41a524a2013-08-14 01:01:40 -04006612 u32 thermal_int;
Alex Deuchera59781b2012-11-09 10:45:57 -05006613
6614 if (!rdev->irq.installed) {
6615 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6616 return -EINVAL;
6617 }
6618 /* don't enable anything if the ih is disabled */
6619 if (!rdev->ih.enabled) {
6620 cik_disable_interrupts(rdev);
6621 /* force the active interrupt state to all disabled */
6622 cik_disable_interrupt_state(rdev);
6623 return 0;
6624 }
6625
6626 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6627 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6628 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6629 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6630 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6631 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6632
Alex Deucher21a93e12013-04-09 12:47:11 -04006633 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6634 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6635
Alex Deucher2b0781a2013-04-09 14:26:16 -04006636 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6637 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6638 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6639 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6640 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6641 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6642 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6643 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6644
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006645 if (rdev->flags & RADEON_IS_IGP)
6646 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6647 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6648 else
6649 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6650 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
Alex Deucher41a524a2013-08-14 01:01:40 -04006651
Alex Deuchera59781b2012-11-09 10:45:57 -05006652 /* enable CP interrupts on all rings */
6653 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6654 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6655 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6656 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04006657 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6658 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6659 DRM_DEBUG("si_irq_set: sw int cp1\n");
6660 if (ring->me == 1) {
6661 switch (ring->pipe) {
6662 case 0:
6663 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6664 break;
6665 case 1:
6666 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6667 break;
6668 case 2:
6669 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6670 break;
6671 case 3:
6672 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6673 break;
6674 default:
6675 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6676 break;
6677 }
6678 } else if (ring->me == 2) {
6679 switch (ring->pipe) {
6680 case 0:
6681 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6682 break;
6683 case 1:
6684 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6685 break;
6686 case 2:
6687 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6688 break;
6689 case 3:
6690 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6691 break;
6692 default:
6693 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6694 break;
6695 }
6696 } else {
6697 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6698 }
6699 }
6700 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6701 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6702 DRM_DEBUG("si_irq_set: sw int cp2\n");
6703 if (ring->me == 1) {
6704 switch (ring->pipe) {
6705 case 0:
6706 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6707 break;
6708 case 1:
6709 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6710 break;
6711 case 2:
6712 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6713 break;
6714 case 3:
6715 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6716 break;
6717 default:
6718 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6719 break;
6720 }
6721 } else if (ring->me == 2) {
6722 switch (ring->pipe) {
6723 case 0:
6724 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6725 break;
6726 case 1:
6727 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6728 break;
6729 case 2:
6730 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6731 break;
6732 case 3:
6733 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6734 break;
6735 default:
6736 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6737 break;
6738 }
6739 } else {
6740 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6741 }
6742 }
Alex Deuchera59781b2012-11-09 10:45:57 -05006743
Alex Deucher21a93e12013-04-09 12:47:11 -04006744 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6745 DRM_DEBUG("cik_irq_set: sw int dma\n");
6746 dma_cntl |= TRAP_ENABLE;
6747 }
6748
6749 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6750 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6751 dma_cntl1 |= TRAP_ENABLE;
6752 }
6753
Alex Deuchera59781b2012-11-09 10:45:57 -05006754 if (rdev->irq.crtc_vblank_int[0] ||
6755 atomic_read(&rdev->irq.pflip[0])) {
6756 DRM_DEBUG("cik_irq_set: vblank 0\n");
6757 crtc1 |= VBLANK_INTERRUPT_MASK;
6758 }
6759 if (rdev->irq.crtc_vblank_int[1] ||
6760 atomic_read(&rdev->irq.pflip[1])) {
6761 DRM_DEBUG("cik_irq_set: vblank 1\n");
6762 crtc2 |= VBLANK_INTERRUPT_MASK;
6763 }
6764 if (rdev->irq.crtc_vblank_int[2] ||
6765 atomic_read(&rdev->irq.pflip[2])) {
6766 DRM_DEBUG("cik_irq_set: vblank 2\n");
6767 crtc3 |= VBLANK_INTERRUPT_MASK;
6768 }
6769 if (rdev->irq.crtc_vblank_int[3] ||
6770 atomic_read(&rdev->irq.pflip[3])) {
6771 DRM_DEBUG("cik_irq_set: vblank 3\n");
6772 crtc4 |= VBLANK_INTERRUPT_MASK;
6773 }
6774 if (rdev->irq.crtc_vblank_int[4] ||
6775 atomic_read(&rdev->irq.pflip[4])) {
6776 DRM_DEBUG("cik_irq_set: vblank 4\n");
6777 crtc5 |= VBLANK_INTERRUPT_MASK;
6778 }
6779 if (rdev->irq.crtc_vblank_int[5] ||
6780 atomic_read(&rdev->irq.pflip[5])) {
6781 DRM_DEBUG("cik_irq_set: vblank 5\n");
6782 crtc6 |= VBLANK_INTERRUPT_MASK;
6783 }
6784 if (rdev->irq.hpd[0]) {
6785 DRM_DEBUG("cik_irq_set: hpd 1\n");
6786 hpd1 |= DC_HPDx_INT_EN;
6787 }
6788 if (rdev->irq.hpd[1]) {
6789 DRM_DEBUG("cik_irq_set: hpd 2\n");
6790 hpd2 |= DC_HPDx_INT_EN;
6791 }
6792 if (rdev->irq.hpd[2]) {
6793 DRM_DEBUG("cik_irq_set: hpd 3\n");
6794 hpd3 |= DC_HPDx_INT_EN;
6795 }
6796 if (rdev->irq.hpd[3]) {
6797 DRM_DEBUG("cik_irq_set: hpd 4\n");
6798 hpd4 |= DC_HPDx_INT_EN;
6799 }
6800 if (rdev->irq.hpd[4]) {
6801 DRM_DEBUG("cik_irq_set: hpd 5\n");
6802 hpd5 |= DC_HPDx_INT_EN;
6803 }
6804 if (rdev->irq.hpd[5]) {
6805 DRM_DEBUG("cik_irq_set: hpd 6\n");
6806 hpd6 |= DC_HPDx_INT_EN;
6807 }
6808
Alex Deucher41a524a2013-08-14 01:01:40 -04006809 if (rdev->irq.dpm_thermal) {
6810 DRM_DEBUG("dpm thermal\n");
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006811 if (rdev->flags & RADEON_IS_IGP)
6812 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6813 else
6814 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
Alex Deucher41a524a2013-08-14 01:01:40 -04006815 }
6816
Alex Deuchera59781b2012-11-09 10:45:57 -05006817 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6818
Alex Deucher21a93e12013-04-09 12:47:11 -04006819 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6820 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6821
Alex Deucher2b0781a2013-04-09 14:26:16 -04006822 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6823 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6824 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6825 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6826 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6827 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6828 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6829 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6830
Alex Deuchera59781b2012-11-09 10:45:57 -05006831 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6832
6833 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6834 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6835 if (rdev->num_crtc >= 4) {
6836 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6837 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6838 }
6839 if (rdev->num_crtc >= 6) {
6840 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6841 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6842 }
6843
6844 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6845 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6846 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6847 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6848 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6849 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6850
Alex Deuchercc8dbbb2013-08-14 01:03:41 -04006851 if (rdev->flags & RADEON_IS_IGP)
6852 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6853 else
6854 WREG32_SMC(CG_THERMAL_INT, thermal_int);
Alex Deucher41a524a2013-08-14 01:01:40 -04006855
Alex Deuchera59781b2012-11-09 10:45:57 -05006856 return 0;
6857}
6858
6859/**
6860 * cik_irq_ack - ack interrupt sources
6861 *
6862 * @rdev: radeon_device pointer
6863 *
6864 * Ack interrupt sources on the GPU (vblanks, hpd,
6865 * etc.) (CIK). Certain interrupts sources are sw
6866 * generated and do not require an explicit ack.
6867 */
6868static inline void cik_irq_ack(struct radeon_device *rdev)
6869{
6870 u32 tmp;
6871
6872 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6873 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6874 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6875 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6876 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6877 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6878 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6879
6880 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6881 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6882 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6883 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6884 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6885 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6886 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6887 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6888
6889 if (rdev->num_crtc >= 4) {
6890 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6891 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6892 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6893 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6894 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6895 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6896 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6897 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6898 }
6899
6900 if (rdev->num_crtc >= 6) {
6901 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6902 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6903 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6904 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6905 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6906 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6907 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6908 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6909 }
6910
6911 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6912 tmp = RREG32(DC_HPD1_INT_CONTROL);
6913 tmp |= DC_HPDx_INT_ACK;
6914 WREG32(DC_HPD1_INT_CONTROL, tmp);
6915 }
6916 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6917 tmp = RREG32(DC_HPD2_INT_CONTROL);
6918 tmp |= DC_HPDx_INT_ACK;
6919 WREG32(DC_HPD2_INT_CONTROL, tmp);
6920 }
6921 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6922 tmp = RREG32(DC_HPD3_INT_CONTROL);
6923 tmp |= DC_HPDx_INT_ACK;
6924 WREG32(DC_HPD3_INT_CONTROL, tmp);
6925 }
6926 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6927 tmp = RREG32(DC_HPD4_INT_CONTROL);
6928 tmp |= DC_HPDx_INT_ACK;
6929 WREG32(DC_HPD4_INT_CONTROL, tmp);
6930 }
6931 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6932 tmp = RREG32(DC_HPD5_INT_CONTROL);
6933 tmp |= DC_HPDx_INT_ACK;
6934 WREG32(DC_HPD5_INT_CONTROL, tmp);
6935 }
6936 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6937 tmp = RREG32(DC_HPD5_INT_CONTROL);
6938 tmp |= DC_HPDx_INT_ACK;
6939 WREG32(DC_HPD6_INT_CONTROL, tmp);
6940 }
6941}
6942
6943/**
6944 * cik_irq_disable - disable interrupts
6945 *
6946 * @rdev: radeon_device pointer
6947 *
6948 * Disable interrupts on the hw (CIK).
6949 */
6950static void cik_irq_disable(struct radeon_device *rdev)
6951{
6952 cik_disable_interrupts(rdev);
6953 /* Wait and acknowledge irq */
6954 mdelay(1);
6955 cik_irq_ack(rdev);
6956 cik_disable_interrupt_state(rdev);
6957}
6958
6959/**
6960 * cik_irq_disable - disable interrupts for suspend
6961 *
6962 * @rdev: radeon_device pointer
6963 *
6964 * Disable interrupts and stop the RLC (CIK).
6965 * Used for suspend.
6966 */
6967static void cik_irq_suspend(struct radeon_device *rdev)
6968{
6969 cik_irq_disable(rdev);
6970 cik_rlc_stop(rdev);
6971}
6972
6973/**
6974 * cik_irq_fini - tear down interrupt support
6975 *
6976 * @rdev: radeon_device pointer
6977 *
6978 * Disable interrupts on the hw and free the IH ring
6979 * buffer (CIK).
6980 * Used for driver unload.
6981 */
6982static void cik_irq_fini(struct radeon_device *rdev)
6983{
6984 cik_irq_suspend(rdev);
6985 r600_ih_ring_fini(rdev);
6986}
6987
6988/**
6989 * cik_get_ih_wptr - get the IH ring buffer wptr
6990 *
6991 * @rdev: radeon_device pointer
6992 *
6993 * Get the IH ring buffer wptr from either the register
6994 * or the writeback memory buffer (CIK). Also check for
6995 * ring buffer overflow and deal with it.
6996 * Used by cik_irq_process().
6997 * Returns the value of the wptr.
6998 */
6999static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7000{
7001 u32 wptr, tmp;
7002
7003 if (rdev->wb.enabled)
7004 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7005 else
7006 wptr = RREG32(IH_RB_WPTR);
7007
7008 if (wptr & RB_OVERFLOW) {
7009 /* When a ring buffer overflow happen start parsing interrupt
7010 * from the last not overwritten vector (wptr + 16). Hopefully
7011 * this should allow us to catchup.
7012 */
7013 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7014 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7015 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7016 tmp = RREG32(IH_RB_CNTL);
7017 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7018 WREG32(IH_RB_CNTL, tmp);
7019 }
7020 return (wptr & rdev->ih.ptr_mask);
7021}
7022
7023/* CIK IV Ring
7024 * Each IV ring entry is 128 bits:
7025 * [7:0] - interrupt source id
7026 * [31:8] - reserved
7027 * [59:32] - interrupt source data
7028 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04007029 * [71:64] - RINGID
7030 * CP:
7031 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05007032 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7033 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7034 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7035 * PIPE_ID - ME0 0=3D
7036 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04007037 * SDMA:
7038 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7039 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7040 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05007041 * [79:72] - VMID
7042 * [95:80] - PASID
7043 * [127:96] - reserved
7044 */
7045/**
7046 * cik_irq_process - interrupt handler
7047 *
7048 * @rdev: radeon_device pointer
7049 *
7050 * Interrupt hander (CIK). Walk the IH ring,
7051 * ack interrupts and schedule work to handle
7052 * interrupt events.
7053 * Returns irq process return code.
7054 */
7055int cik_irq_process(struct radeon_device *rdev)
7056{
Alex Deucher2b0781a2013-04-09 14:26:16 -04007057 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7058 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05007059 u32 wptr;
7060 u32 rptr;
7061 u32 src_id, src_data, ring_id;
7062 u8 me_id, pipe_id, queue_id;
7063 u32 ring_index;
7064 bool queue_hotplug = false;
7065 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04007066 u32 addr, status, mc_client;
Alex Deucher41a524a2013-08-14 01:01:40 -04007067 bool queue_thermal = false;
Alex Deuchera59781b2012-11-09 10:45:57 -05007068
7069 if (!rdev->ih.enabled || rdev->shutdown)
7070 return IRQ_NONE;
7071
7072 wptr = cik_get_ih_wptr(rdev);
7073
7074restart_ih:
7075 /* is somebody else already processing irqs? */
7076 if (atomic_xchg(&rdev->ih.lock, 1))
7077 return IRQ_NONE;
7078
7079 rptr = rdev->ih.rptr;
7080 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7081
7082 /* Order reading of wptr vs. reading of IH ring data */
7083 rmb();
7084
7085 /* display interrupts */
7086 cik_irq_ack(rdev);
7087
7088 while (rptr != wptr) {
7089 /* wptr/rptr are in bytes! */
7090 ring_index = rptr / 4;
7091 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7092 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7093 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05007094
7095 switch (src_id) {
7096 case 1: /* D1 vblank/vline */
7097 switch (src_data) {
7098 case 0: /* D1 vblank */
7099 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7100 if (rdev->irq.crtc_vblank_int[0]) {
7101 drm_handle_vblank(rdev->ddev, 0);
7102 rdev->pm.vblank_sync = true;
7103 wake_up(&rdev->irq.vblank_queue);
7104 }
7105 if (atomic_read(&rdev->irq.pflip[0]))
7106 radeon_crtc_handle_flip(rdev, 0);
7107 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7108 DRM_DEBUG("IH: D1 vblank\n");
7109 }
7110 break;
7111 case 1: /* D1 vline */
7112 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7113 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7114 DRM_DEBUG("IH: D1 vline\n");
7115 }
7116 break;
7117 default:
7118 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7119 break;
7120 }
7121 break;
7122 case 2: /* D2 vblank/vline */
7123 switch (src_data) {
7124 case 0: /* D2 vblank */
7125 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7126 if (rdev->irq.crtc_vblank_int[1]) {
7127 drm_handle_vblank(rdev->ddev, 1);
7128 rdev->pm.vblank_sync = true;
7129 wake_up(&rdev->irq.vblank_queue);
7130 }
7131 if (atomic_read(&rdev->irq.pflip[1]))
7132 radeon_crtc_handle_flip(rdev, 1);
7133 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7134 DRM_DEBUG("IH: D2 vblank\n");
7135 }
7136 break;
7137 case 1: /* D2 vline */
7138 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7139 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7140 DRM_DEBUG("IH: D2 vline\n");
7141 }
7142 break;
7143 default:
7144 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7145 break;
7146 }
7147 break;
7148 case 3: /* D3 vblank/vline */
7149 switch (src_data) {
7150 case 0: /* D3 vblank */
7151 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7152 if (rdev->irq.crtc_vblank_int[2]) {
7153 drm_handle_vblank(rdev->ddev, 2);
7154 rdev->pm.vblank_sync = true;
7155 wake_up(&rdev->irq.vblank_queue);
7156 }
7157 if (atomic_read(&rdev->irq.pflip[2]))
7158 radeon_crtc_handle_flip(rdev, 2);
7159 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7160 DRM_DEBUG("IH: D3 vblank\n");
7161 }
7162 break;
7163 case 1: /* D3 vline */
7164 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7165 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7166 DRM_DEBUG("IH: D3 vline\n");
7167 }
7168 break;
7169 default:
7170 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7171 break;
7172 }
7173 break;
7174 case 4: /* D4 vblank/vline */
7175 switch (src_data) {
7176 case 0: /* D4 vblank */
7177 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7178 if (rdev->irq.crtc_vblank_int[3]) {
7179 drm_handle_vblank(rdev->ddev, 3);
7180 rdev->pm.vblank_sync = true;
7181 wake_up(&rdev->irq.vblank_queue);
7182 }
7183 if (atomic_read(&rdev->irq.pflip[3]))
7184 radeon_crtc_handle_flip(rdev, 3);
7185 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7186 DRM_DEBUG("IH: D4 vblank\n");
7187 }
7188 break;
7189 case 1: /* D4 vline */
7190 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7191 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7192 DRM_DEBUG("IH: D4 vline\n");
7193 }
7194 break;
7195 default:
7196 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7197 break;
7198 }
7199 break;
7200 case 5: /* D5 vblank/vline */
7201 switch (src_data) {
7202 case 0: /* D5 vblank */
7203 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7204 if (rdev->irq.crtc_vblank_int[4]) {
7205 drm_handle_vblank(rdev->ddev, 4);
7206 rdev->pm.vblank_sync = true;
7207 wake_up(&rdev->irq.vblank_queue);
7208 }
7209 if (atomic_read(&rdev->irq.pflip[4]))
7210 radeon_crtc_handle_flip(rdev, 4);
7211 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7212 DRM_DEBUG("IH: D5 vblank\n");
7213 }
7214 break;
7215 case 1: /* D5 vline */
7216 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7217 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7218 DRM_DEBUG("IH: D5 vline\n");
7219 }
7220 break;
7221 default:
7222 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7223 break;
7224 }
7225 break;
7226 case 6: /* D6 vblank/vline */
7227 switch (src_data) {
7228 case 0: /* D6 vblank */
7229 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7230 if (rdev->irq.crtc_vblank_int[5]) {
7231 drm_handle_vblank(rdev->ddev, 5);
7232 rdev->pm.vblank_sync = true;
7233 wake_up(&rdev->irq.vblank_queue);
7234 }
7235 if (atomic_read(&rdev->irq.pflip[5]))
7236 radeon_crtc_handle_flip(rdev, 5);
7237 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7238 DRM_DEBUG("IH: D6 vblank\n");
7239 }
7240 break;
7241 case 1: /* D6 vline */
7242 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7243 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7244 DRM_DEBUG("IH: D6 vline\n");
7245 }
7246 break;
7247 default:
7248 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7249 break;
7250 }
7251 break;
7252 case 42: /* HPD hotplug */
7253 switch (src_data) {
7254 case 0:
7255 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7256 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7257 queue_hotplug = true;
7258 DRM_DEBUG("IH: HPD1\n");
7259 }
7260 break;
7261 case 1:
7262 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7263 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7264 queue_hotplug = true;
7265 DRM_DEBUG("IH: HPD2\n");
7266 }
7267 break;
7268 case 2:
7269 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7270 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7271 queue_hotplug = true;
7272 DRM_DEBUG("IH: HPD3\n");
7273 }
7274 break;
7275 case 3:
7276 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7277 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7278 queue_hotplug = true;
7279 DRM_DEBUG("IH: HPD4\n");
7280 }
7281 break;
7282 case 4:
7283 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7284 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7285 queue_hotplug = true;
7286 DRM_DEBUG("IH: HPD5\n");
7287 }
7288 break;
7289 case 5:
7290 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7291 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7292 queue_hotplug = true;
7293 DRM_DEBUG("IH: HPD6\n");
7294 }
7295 break;
7296 default:
7297 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7298 break;
7299 }
7300 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04007301 case 146:
7302 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04007303 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7304 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7305 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04007306 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7307 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007308 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04007309 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04007310 status);
7311 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04007312 /* reset addr and status */
7313 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7314 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007315 case 176: /* GFX RB CP_INT */
7316 case 177: /* GFX IB CP_INT */
7317 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7318 break;
7319 case 181: /* CP EOP event */
7320 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007321 /* XXX check the bitfield order! */
7322 me_id = (ring_id & 0x60) >> 5;
7323 pipe_id = (ring_id & 0x18) >> 3;
7324 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007325 switch (me_id) {
7326 case 0:
7327 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7328 break;
7329 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05007330 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04007331 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7332 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7333 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7334 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05007335 break;
7336 }
7337 break;
7338 case 184: /* CP Privileged reg access */
7339 DRM_ERROR("Illegal register access in command stream\n");
7340 /* XXX check the bitfield order! */
7341 me_id = (ring_id & 0x60) >> 5;
7342 pipe_id = (ring_id & 0x18) >> 3;
7343 queue_id = (ring_id & 0x7) >> 0;
7344 switch (me_id) {
7345 case 0:
7346 /* This results in a full GPU reset, but all we need to do is soft
7347 * reset the CP for gfx
7348 */
7349 queue_reset = true;
7350 break;
7351 case 1:
7352 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007353 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007354 break;
7355 case 2:
7356 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007357 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007358 break;
7359 }
7360 break;
7361 case 185: /* CP Privileged inst */
7362 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04007363 /* XXX check the bitfield order! */
7364 me_id = (ring_id & 0x60) >> 5;
7365 pipe_id = (ring_id & 0x18) >> 3;
7366 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05007367 switch (me_id) {
7368 case 0:
7369 /* This results in a full GPU reset, but all we need to do is soft
7370 * reset the CP for gfx
7371 */
7372 queue_reset = true;
7373 break;
7374 case 1:
7375 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007376 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007377 break;
7378 case 2:
7379 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04007380 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05007381 break;
7382 }
7383 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04007384 case 224: /* SDMA trap event */
7385 /* XXX check the bitfield order! */
7386 me_id = (ring_id & 0x3) >> 0;
7387 queue_id = (ring_id & 0xc) >> 2;
7388 DRM_DEBUG("IH: SDMA trap\n");
7389 switch (me_id) {
7390 case 0:
7391 switch (queue_id) {
7392 case 0:
7393 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7394 break;
7395 case 1:
7396 /* XXX compute */
7397 break;
7398 case 2:
7399 /* XXX compute */
7400 break;
7401 }
7402 break;
7403 case 1:
7404 switch (queue_id) {
7405 case 0:
7406 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7407 break;
7408 case 1:
7409 /* XXX compute */
7410 break;
7411 case 2:
7412 /* XXX compute */
7413 break;
7414 }
7415 break;
7416 }
7417 break;
Alex Deucher41a524a2013-08-14 01:01:40 -04007418 case 230: /* thermal low to high */
7419 DRM_DEBUG("IH: thermal low to high\n");
7420 rdev->pm.dpm.thermal.high_to_low = false;
7421 queue_thermal = true;
7422 break;
7423 case 231: /* thermal high to low */
7424 DRM_DEBUG("IH: thermal high to low\n");
7425 rdev->pm.dpm.thermal.high_to_low = true;
7426 queue_thermal = true;
7427 break;
7428 case 233: /* GUI IDLE */
7429 DRM_DEBUG("IH: GUI idle\n");
7430 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04007431 case 241: /* SDMA Privileged inst */
7432 case 247: /* SDMA Privileged inst */
7433 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7434 /* XXX check the bitfield order! */
7435 me_id = (ring_id & 0x3) >> 0;
7436 queue_id = (ring_id & 0xc) >> 2;
7437 switch (me_id) {
7438 case 0:
7439 switch (queue_id) {
7440 case 0:
7441 queue_reset = true;
7442 break;
7443 case 1:
7444 /* XXX compute */
7445 queue_reset = true;
7446 break;
7447 case 2:
7448 /* XXX compute */
7449 queue_reset = true;
7450 break;
7451 }
7452 break;
7453 case 1:
7454 switch (queue_id) {
7455 case 0:
7456 queue_reset = true;
7457 break;
7458 case 1:
7459 /* XXX compute */
7460 queue_reset = true;
7461 break;
7462 case 2:
7463 /* XXX compute */
7464 queue_reset = true;
7465 break;
7466 }
7467 break;
7468 }
7469 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05007470 default:
7471 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7472 break;
7473 }
7474
7475 /* wptr/rptr are in bytes! */
7476 rptr += 16;
7477 rptr &= rdev->ih.ptr_mask;
7478 }
7479 if (queue_hotplug)
7480 schedule_work(&rdev->hotplug_work);
7481 if (queue_reset)
7482 schedule_work(&rdev->reset_work);
Alex Deucher41a524a2013-08-14 01:01:40 -04007483 if (queue_thermal)
7484 schedule_work(&rdev->pm.dpm.thermal.work);
Alex Deuchera59781b2012-11-09 10:45:57 -05007485 rdev->ih.rptr = rptr;
7486 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7487 atomic_set(&rdev->ih.lock, 0);
7488
7489 /* make sure wptr hasn't changed while processing */
7490 wptr = cik_get_ih_wptr(rdev);
7491 if (wptr != rptr)
7492 goto restart_ih;
7493
7494 return IRQ_HANDLED;
7495}
Alex Deucher7bf94a22012-08-17 11:48:29 -04007496
7497/*
7498 * startup/shutdown callbacks
7499 */
7500/**
7501 * cik_startup - program the asic to a functional state
7502 *
7503 * @rdev: radeon_device pointer
7504 *
7505 * Programs the asic to a functional state (CIK).
7506 * Called by cik_init() and cik_resume().
7507 * Returns 0 for success, error for failure.
7508 */
7509static int cik_startup(struct radeon_device *rdev)
7510{
7511 struct radeon_ring *ring;
7512 int r;
7513
Alex Deucher8a7cd272013-08-06 11:29:39 -04007514 /* enable pcie gen2/3 link */
7515 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04007516 /* enable aspm */
7517 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04007518
Alex Deucher6fab3feb2013-08-04 12:13:17 -04007519 cik_mc_program(rdev);
7520
Alex Deucher7bf94a22012-08-17 11:48:29 -04007521 if (rdev->flags & RADEON_IS_IGP) {
7522 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7523 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7524 r = cik_init_microcode(rdev);
7525 if (r) {
7526 DRM_ERROR("Failed to load firmware!\n");
7527 return r;
7528 }
7529 }
7530 } else {
7531 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7532 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7533 !rdev->mc_fw) {
7534 r = cik_init_microcode(rdev);
7535 if (r) {
7536 DRM_ERROR("Failed to load firmware!\n");
7537 return r;
7538 }
7539 }
7540
7541 r = ci_mc_load_microcode(rdev);
7542 if (r) {
7543 DRM_ERROR("Failed to load MC firmware!\n");
7544 return r;
7545 }
7546 }
7547
7548 r = r600_vram_scratch_init(rdev);
7549 if (r)
7550 return r;
7551
Alex Deucher7bf94a22012-08-17 11:48:29 -04007552 r = cik_pcie_gart_enable(rdev);
7553 if (r)
7554 return r;
7555 cik_gpu_init(rdev);
7556
7557 /* allocate rlc buffers */
Alex Deucher22c775c2013-07-23 09:41:05 -04007558 if (rdev->flags & RADEON_IS_IGP) {
7559 if (rdev->family == CHIP_KAVERI) {
7560 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7561 rdev->rlc.reg_list_size =
7562 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7563 } else {
7564 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7565 rdev->rlc.reg_list_size =
7566 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7567 }
7568 }
7569 rdev->rlc.cs_data = ci_cs_data;
7570 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
Alex Deucher1fd11772013-04-17 17:53:50 -04007571 r = sumo_rlc_init(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007572 if (r) {
7573 DRM_ERROR("Failed to init rlc BOs!\n");
7574 return r;
7575 }
7576
7577 /* allocate wb buffer */
7578 r = radeon_wb_init(rdev);
7579 if (r)
7580 return r;
7581
Alex Deucher963e81f2013-06-26 17:37:11 -04007582 /* allocate mec buffers */
7583 r = cik_mec_init(rdev);
7584 if (r) {
7585 DRM_ERROR("Failed to init MEC BOs!\n");
7586 return r;
7587 }
7588
Alex Deucher7bf94a22012-08-17 11:48:29 -04007589 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7590 if (r) {
7591 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7592 return r;
7593 }
7594
Alex Deucher963e81f2013-06-26 17:37:11 -04007595 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7596 if (r) {
7597 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7598 return r;
7599 }
7600
7601 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7602 if (r) {
7603 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7604 return r;
7605 }
7606
Alex Deucher7bf94a22012-08-17 11:48:29 -04007607 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7608 if (r) {
7609 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7610 return r;
7611 }
7612
7613 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7614 if (r) {
7615 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7616 return r;
7617 }
7618
Alex Deucher5e884f62013-08-06 11:39:38 -04007619 r = radeon_uvd_resume(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007620 if (!r) {
Alex Deucher5e884f62013-08-06 11:39:38 -04007621 cik_uvd_resume(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007622 r = radeon_fence_driver_start_ring(rdev,
7623 R600_RING_TYPE_UVD_INDEX);
7624 if (r)
7625 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7626 }
7627 if (r)
7628 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7629
Alex Deucher7bf94a22012-08-17 11:48:29 -04007630 /* Enable IRQ */
7631 if (!rdev->irq.installed) {
7632 r = radeon_irq_kms_init(rdev);
7633 if (r)
7634 return r;
7635 }
7636
7637 r = cik_irq_init(rdev);
7638 if (r) {
7639 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7640 radeon_irq_kms_fini(rdev);
7641 return r;
7642 }
7643 cik_irq_set(rdev);
7644
7645 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7646 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7647 CP_RB0_RPTR, CP_RB0_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02007648 RADEON_CP_PACKET2);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007649 if (r)
7650 return r;
7651
Alex Deucher963e81f2013-06-26 17:37:11 -04007652 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04007653 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007654 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7655 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7656 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02007657 PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007658 if (r)
7659 return r;
7660 ring->me = 1; /* first MEC */
7661 ring->pipe = 0; /* first pipe */
7662 ring->queue = 0; /* first queue */
7663 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7664
Alex Deucher2615b532013-06-03 11:21:58 -04007665 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04007666 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7667 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7668 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02007669 PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04007670 if (r)
7671 return r;
7672 /* dGPU only have 1 MEC */
7673 ring->me = 1; /* first MEC */
7674 ring->pipe = 0; /* first pipe */
7675 ring->queue = 1; /* second queue */
7676 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7677
Alex Deucher7bf94a22012-08-17 11:48:29 -04007678 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7679 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7680 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7681 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
Christian König2e1e6da2013-08-13 11:56:52 +02007682 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
Alex Deucher7bf94a22012-08-17 11:48:29 -04007683 if (r)
7684 return r;
7685
7686 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7687 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7688 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7689 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
Christian König2e1e6da2013-08-13 11:56:52 +02007690 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
Alex Deucher7bf94a22012-08-17 11:48:29 -04007691 if (r)
7692 return r;
7693
7694 r = cik_cp_resume(rdev);
7695 if (r)
7696 return r;
7697
7698 r = cik_sdma_resume(rdev);
7699 if (r)
7700 return r;
7701
Christian König87167bb2013-04-09 13:39:21 -04007702 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7703 if (ring->ring_size) {
Christian König02c9f7f2013-08-13 11:56:51 +02007704 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
Christian König87167bb2013-04-09 13:39:21 -04007705 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
Christian König2e1e6da2013-08-13 11:56:52 +02007706 RADEON_CP_PACKET2);
Christian König87167bb2013-04-09 13:39:21 -04007707 if (!r)
Alex Deucher5e884f62013-08-06 11:39:38 -04007708 r = r600_uvd_init(rdev, true);
Christian König87167bb2013-04-09 13:39:21 -04007709 if (r)
7710 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7711 }
7712
Alex Deucher7bf94a22012-08-17 11:48:29 -04007713 r = radeon_ib_pool_init(rdev);
7714 if (r) {
7715 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7716 return r;
7717 }
7718
7719 r = radeon_vm_manager_init(rdev);
7720 if (r) {
7721 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7722 return r;
7723 }
7724
7725 return 0;
7726}
7727
7728/**
7729 * cik_resume - resume the asic to a functional state
7730 *
7731 * @rdev: radeon_device pointer
7732 *
7733 * Programs the asic to a functional state (CIK).
7734 * Called at resume.
7735 * Returns 0 for success, error for failure.
7736 */
7737int cik_resume(struct radeon_device *rdev)
7738{
7739 int r;
7740
7741 /* post card */
7742 atom_asic_init(rdev->mode_info.atom_context);
7743
Alex Deucher0aafd312013-04-09 14:43:30 -04007744 /* init golden registers */
7745 cik_init_golden_registers(rdev);
7746
Alex Deucher7bf94a22012-08-17 11:48:29 -04007747 rdev->accel_working = true;
7748 r = cik_startup(rdev);
7749 if (r) {
7750 DRM_ERROR("cik startup failed on resume\n");
7751 rdev->accel_working = false;
7752 return r;
7753 }
7754
7755 return r;
7756
7757}
7758
7759/**
7760 * cik_suspend - suspend the asic
7761 *
7762 * @rdev: radeon_device pointer
7763 *
7764 * Bring the chip into a state suitable for suspend (CIK).
7765 * Called at suspend.
7766 * Returns 0 for success.
7767 */
7768int cik_suspend(struct radeon_device *rdev)
7769{
7770 radeon_vm_manager_fini(rdev);
7771 cik_cp_enable(rdev, false);
7772 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02007773 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007774 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007775 cik_irq_suspend(rdev);
7776 radeon_wb_disable(rdev);
7777 cik_pcie_gart_disable(rdev);
7778 return 0;
7779}
7780
7781/* Plan is to move initialization in that function and use
7782 * helper function so that radeon_device_init pretty much
7783 * do nothing more than calling asic specific function. This
7784 * should also allow to remove a bunch of callback function
7785 * like vram_info.
7786 */
7787/**
7788 * cik_init - asic specific driver and hw init
7789 *
7790 * @rdev: radeon_device pointer
7791 *
7792 * Setup asic specific driver variables and program the hw
7793 * to a functional state (CIK).
7794 * Called at driver startup.
7795 * Returns 0 for success, errors for failure.
7796 */
7797int cik_init(struct radeon_device *rdev)
7798{
7799 struct radeon_ring *ring;
7800 int r;
7801
7802 /* Read BIOS */
7803 if (!radeon_get_bios(rdev)) {
7804 if (ASIC_IS_AVIVO(rdev))
7805 return -EINVAL;
7806 }
7807 /* Must be an ATOMBIOS */
7808 if (!rdev->is_atom_bios) {
7809 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7810 return -EINVAL;
7811 }
7812 r = radeon_atombios_init(rdev);
7813 if (r)
7814 return r;
7815
7816 /* Post card if necessary */
7817 if (!radeon_card_posted(rdev)) {
7818 if (!rdev->bios) {
7819 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7820 return -EINVAL;
7821 }
7822 DRM_INFO("GPU not posted. posting now...\n");
7823 atom_asic_init(rdev->mode_info.atom_context);
7824 }
Alex Deucher0aafd312013-04-09 14:43:30 -04007825 /* init golden registers */
7826 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007827 /* Initialize scratch registers */
7828 cik_scratch_init(rdev);
7829 /* Initialize surface registers */
7830 radeon_surface_init(rdev);
7831 /* Initialize clocks */
7832 radeon_get_clock_info(rdev->ddev);
7833
7834 /* Fence driver */
7835 r = radeon_fence_driver_init(rdev);
7836 if (r)
7837 return r;
7838
7839 /* initialize memory controller */
7840 r = cik_mc_init(rdev);
7841 if (r)
7842 return r;
7843 /* Memory manager */
7844 r = radeon_bo_init(rdev);
7845 if (r)
7846 return r;
7847
7848 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7849 ring->ring_obj = NULL;
7850 r600_ring_init(rdev, ring, 1024 * 1024);
7851
Alex Deucher963e81f2013-06-26 17:37:11 -04007852 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7853 ring->ring_obj = NULL;
7854 r600_ring_init(rdev, ring, 1024 * 1024);
7855 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7856 if (r)
7857 return r;
7858
7859 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7860 ring->ring_obj = NULL;
7861 r600_ring_init(rdev, ring, 1024 * 1024);
7862 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7863 if (r)
7864 return r;
7865
Alex Deucher7bf94a22012-08-17 11:48:29 -04007866 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7867 ring->ring_obj = NULL;
7868 r600_ring_init(rdev, ring, 256 * 1024);
7869
7870 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7871 ring->ring_obj = NULL;
7872 r600_ring_init(rdev, ring, 256 * 1024);
7873
Christian König87167bb2013-04-09 13:39:21 -04007874 r = radeon_uvd_init(rdev);
7875 if (!r) {
7876 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7877 ring->ring_obj = NULL;
7878 r600_ring_init(rdev, ring, 4096);
7879 }
7880
Alex Deucher7bf94a22012-08-17 11:48:29 -04007881 rdev->ih.ring_obj = NULL;
7882 r600_ih_ring_init(rdev, 64 * 1024);
7883
7884 r = r600_pcie_gart_init(rdev);
7885 if (r)
7886 return r;
7887
7888 rdev->accel_working = true;
7889 r = cik_startup(rdev);
7890 if (r) {
7891 dev_err(rdev->dev, "disabling GPU acceleration\n");
7892 cik_cp_fini(rdev);
7893 cik_sdma_fini(rdev);
7894 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007895 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007896 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007897 radeon_wb_fini(rdev);
7898 radeon_ib_pool_fini(rdev);
7899 radeon_vm_manager_fini(rdev);
7900 radeon_irq_kms_fini(rdev);
7901 cik_pcie_gart_fini(rdev);
7902 rdev->accel_working = false;
7903 }
7904
7905 /* Don't start up if the MC ucode is missing.
7906 * The default clocks and voltages before the MC ucode
7907 * is loaded are not suffient for advanced operations.
7908 */
7909 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7910 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7911 return -EINVAL;
7912 }
7913
7914 return 0;
7915}
7916
7917/**
7918 * cik_fini - asic specific driver and hw fini
7919 *
7920 * @rdev: radeon_device pointer
7921 *
7922 * Tear down the asic specific driver variables and program the hw
7923 * to an idle state (CIK).
7924 * Called at driver unload.
7925 */
7926void cik_fini(struct radeon_device *rdev)
7927{
7928 cik_cp_fini(rdev);
7929 cik_sdma_fini(rdev);
7930 cik_irq_fini(rdev);
Alex Deucher1fd11772013-04-17 17:53:50 -04007931 sumo_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04007932 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007933 radeon_wb_fini(rdev);
7934 radeon_vm_manager_fini(rdev);
7935 radeon_ib_pool_fini(rdev);
7936 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02007937 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04007938 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04007939 cik_pcie_gart_fini(rdev);
7940 r600_vram_scratch_fini(rdev);
7941 radeon_gem_fini(rdev);
7942 radeon_fence_driver_fini(rdev);
7943 radeon_bo_fini(rdev);
7944 radeon_atombios_fini(rdev);
7945 kfree(rdev->bios);
7946 rdev->bios = NULL;
7947}
Alex Deuchercd84a272012-07-20 17:13:13 -04007948
7949/* display watermark setup */
7950/**
7951 * dce8_line_buffer_adjust - Set up the line buffer
7952 *
7953 * @rdev: radeon_device pointer
7954 * @radeon_crtc: the selected display controller
7955 * @mode: the current display mode on the selected display
7956 * controller
7957 *
7958 * Setup up the line buffer allocation for
7959 * the selected display controller (CIK).
7960 * Returns the line buffer size in pixels.
7961 */
7962static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7963 struct radeon_crtc *radeon_crtc,
7964 struct drm_display_mode *mode)
7965{
7966 u32 tmp;
7967
7968 /*
7969 * Line Buffer Setup
7970 * There are 6 line buffers, one for each display controllers.
7971 * There are 3 partitions per LB. Select the number of partitions
7972 * to enable based on the display width. For display widths larger
7973 * than 4096, you need use to use 2 display controllers and combine
7974 * them using the stereo blender.
7975 */
7976 if (radeon_crtc->base.enabled && mode) {
7977 if (mode->crtc_hdisplay < 1920)
7978 tmp = 1;
7979 else if (mode->crtc_hdisplay < 2560)
7980 tmp = 2;
7981 else if (mode->crtc_hdisplay < 4096)
7982 tmp = 0;
7983 else {
7984 DRM_DEBUG_KMS("Mode too big for LB!\n");
7985 tmp = 0;
7986 }
7987 } else
7988 tmp = 1;
7989
7990 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7991 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7992
7993 if (radeon_crtc->base.enabled && mode) {
7994 switch (tmp) {
7995 case 0:
7996 default:
7997 return 4096 * 2;
7998 case 1:
7999 return 1920 * 2;
8000 case 2:
8001 return 2560 * 2;
8002 }
8003 }
8004
8005 /* controller not enabled, so no lb used */
8006 return 0;
8007}
8008
8009/**
8010 * cik_get_number_of_dram_channels - get the number of dram channels
8011 *
8012 * @rdev: radeon_device pointer
8013 *
8014 * Look up the number of video ram channels (CIK).
8015 * Used for display watermark bandwidth calculations
8016 * Returns the number of dram channels
8017 */
8018static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8019{
8020 u32 tmp = RREG32(MC_SHARED_CHMAP);
8021
8022 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8023 case 0:
8024 default:
8025 return 1;
8026 case 1:
8027 return 2;
8028 case 2:
8029 return 4;
8030 case 3:
8031 return 8;
8032 case 4:
8033 return 3;
8034 case 5:
8035 return 6;
8036 case 6:
8037 return 10;
8038 case 7:
8039 return 12;
8040 case 8:
8041 return 16;
8042 }
8043}
8044
8045struct dce8_wm_params {
8046 u32 dram_channels; /* number of dram channels */
8047 u32 yclk; /* bandwidth per dram data pin in kHz */
8048 u32 sclk; /* engine clock in kHz */
8049 u32 disp_clk; /* display clock in kHz */
8050 u32 src_width; /* viewport width */
8051 u32 active_time; /* active display time in ns */
8052 u32 blank_time; /* blank time in ns */
8053 bool interlaced; /* mode is interlaced */
8054 fixed20_12 vsc; /* vertical scale ratio */
8055 u32 num_heads; /* number of active crtcs */
8056 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8057 u32 lb_size; /* line buffer allocated to pipe */
8058 u32 vtaps; /* vertical scaler taps */
8059};
8060
8061/**
8062 * dce8_dram_bandwidth - get the dram bandwidth
8063 *
8064 * @wm: watermark calculation data
8065 *
8066 * Calculate the raw dram bandwidth (CIK).
8067 * Used for display watermark bandwidth calculations
8068 * Returns the dram bandwidth in MBytes/s
8069 */
8070static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8071{
8072 /* Calculate raw DRAM Bandwidth */
8073 fixed20_12 dram_efficiency; /* 0.7 */
8074 fixed20_12 yclk, dram_channels, bandwidth;
8075 fixed20_12 a;
8076
8077 a.full = dfixed_const(1000);
8078 yclk.full = dfixed_const(wm->yclk);
8079 yclk.full = dfixed_div(yclk, a);
8080 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8081 a.full = dfixed_const(10);
8082 dram_efficiency.full = dfixed_const(7);
8083 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8084 bandwidth.full = dfixed_mul(dram_channels, yclk);
8085 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8086
8087 return dfixed_trunc(bandwidth);
8088}
8089
8090/**
8091 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8092 *
8093 * @wm: watermark calculation data
8094 *
8095 * Calculate the dram bandwidth used for display (CIK).
8096 * Used for display watermark bandwidth calculations
8097 * Returns the dram bandwidth for display in MBytes/s
8098 */
8099static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8100{
8101 /* Calculate DRAM Bandwidth and the part allocated to display. */
8102 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8103 fixed20_12 yclk, dram_channels, bandwidth;
8104 fixed20_12 a;
8105
8106 a.full = dfixed_const(1000);
8107 yclk.full = dfixed_const(wm->yclk);
8108 yclk.full = dfixed_div(yclk, a);
8109 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8110 a.full = dfixed_const(10);
8111 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8112 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8113 bandwidth.full = dfixed_mul(dram_channels, yclk);
8114 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8115
8116 return dfixed_trunc(bandwidth);
8117}
8118
8119/**
8120 * dce8_data_return_bandwidth - get the data return bandwidth
8121 *
8122 * @wm: watermark calculation data
8123 *
8124 * Calculate the data return bandwidth used for display (CIK).
8125 * Used for display watermark bandwidth calculations
8126 * Returns the data return bandwidth in MBytes/s
8127 */
8128static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8129{
8130 /* Calculate the display Data return Bandwidth */
8131 fixed20_12 return_efficiency; /* 0.8 */
8132 fixed20_12 sclk, bandwidth;
8133 fixed20_12 a;
8134
8135 a.full = dfixed_const(1000);
8136 sclk.full = dfixed_const(wm->sclk);
8137 sclk.full = dfixed_div(sclk, a);
8138 a.full = dfixed_const(10);
8139 return_efficiency.full = dfixed_const(8);
8140 return_efficiency.full = dfixed_div(return_efficiency, a);
8141 a.full = dfixed_const(32);
8142 bandwidth.full = dfixed_mul(a, sclk);
8143 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8144
8145 return dfixed_trunc(bandwidth);
8146}
8147
8148/**
8149 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8150 *
8151 * @wm: watermark calculation data
8152 *
8153 * Calculate the dmif bandwidth used for display (CIK).
8154 * Used for display watermark bandwidth calculations
8155 * Returns the dmif bandwidth in MBytes/s
8156 */
8157static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8158{
8159 /* Calculate the DMIF Request Bandwidth */
8160 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8161 fixed20_12 disp_clk, bandwidth;
8162 fixed20_12 a, b;
8163
8164 a.full = dfixed_const(1000);
8165 disp_clk.full = dfixed_const(wm->disp_clk);
8166 disp_clk.full = dfixed_div(disp_clk, a);
8167 a.full = dfixed_const(32);
8168 b.full = dfixed_mul(a, disp_clk);
8169
8170 a.full = dfixed_const(10);
8171 disp_clk_request_efficiency.full = dfixed_const(8);
8172 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8173
8174 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8175
8176 return dfixed_trunc(bandwidth);
8177}
8178
8179/**
8180 * dce8_available_bandwidth - get the min available bandwidth
8181 *
8182 * @wm: watermark calculation data
8183 *
8184 * Calculate the min available bandwidth used for display (CIK).
8185 * Used for display watermark bandwidth calculations
8186 * Returns the min available bandwidth in MBytes/s
8187 */
8188static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8189{
8190 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8191 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8192 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8193 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8194
8195 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8196}
8197
8198/**
8199 * dce8_average_bandwidth - get the average available bandwidth
8200 *
8201 * @wm: watermark calculation data
8202 *
8203 * Calculate the average available bandwidth used for display (CIK).
8204 * Used for display watermark bandwidth calculations
8205 * Returns the average available bandwidth in MBytes/s
8206 */
8207static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8208{
8209 /* Calculate the display mode Average Bandwidth
8210 * DisplayMode should contain the source and destination dimensions,
8211 * timing, etc.
8212 */
8213 fixed20_12 bpp;
8214 fixed20_12 line_time;
8215 fixed20_12 src_width;
8216 fixed20_12 bandwidth;
8217 fixed20_12 a;
8218
8219 a.full = dfixed_const(1000);
8220 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8221 line_time.full = dfixed_div(line_time, a);
8222 bpp.full = dfixed_const(wm->bytes_per_pixel);
8223 src_width.full = dfixed_const(wm->src_width);
8224 bandwidth.full = dfixed_mul(src_width, bpp);
8225 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8226 bandwidth.full = dfixed_div(bandwidth, line_time);
8227
8228 return dfixed_trunc(bandwidth);
8229}
8230
8231/**
8232 * dce8_latency_watermark - get the latency watermark
8233 *
8234 * @wm: watermark calculation data
8235 *
8236 * Calculate the latency watermark (CIK).
8237 * Used for display watermark bandwidth calculations
8238 * Returns the latency watermark in ns
8239 */
8240static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8241{
8242 /* First calculate the latency in ns */
8243 u32 mc_latency = 2000; /* 2000 ns. */
8244 u32 available_bandwidth = dce8_available_bandwidth(wm);
8245 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8246 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8247 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8248 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8249 (wm->num_heads * cursor_line_pair_return_time);
8250 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8251 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8252 u32 tmp, dmif_size = 12288;
8253 fixed20_12 a, b, c;
8254
8255 if (wm->num_heads == 0)
8256 return 0;
8257
8258 a.full = dfixed_const(2);
8259 b.full = dfixed_const(1);
8260 if ((wm->vsc.full > a.full) ||
8261 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8262 (wm->vtaps >= 5) ||
8263 ((wm->vsc.full >= a.full) && wm->interlaced))
8264 max_src_lines_per_dst_line = 4;
8265 else
8266 max_src_lines_per_dst_line = 2;
8267
8268 a.full = dfixed_const(available_bandwidth);
8269 b.full = dfixed_const(wm->num_heads);
8270 a.full = dfixed_div(a, b);
8271
8272 b.full = dfixed_const(mc_latency + 512);
8273 c.full = dfixed_const(wm->disp_clk);
8274 b.full = dfixed_div(b, c);
8275
8276 c.full = dfixed_const(dmif_size);
8277 b.full = dfixed_div(c, b);
8278
8279 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8280
8281 b.full = dfixed_const(1000);
8282 c.full = dfixed_const(wm->disp_clk);
8283 b.full = dfixed_div(c, b);
8284 c.full = dfixed_const(wm->bytes_per_pixel);
8285 b.full = dfixed_mul(b, c);
8286
8287 lb_fill_bw = min(tmp, dfixed_trunc(b));
8288
8289 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8290 b.full = dfixed_const(1000);
8291 c.full = dfixed_const(lb_fill_bw);
8292 b.full = dfixed_div(c, b);
8293 a.full = dfixed_div(a, b);
8294 line_fill_time = dfixed_trunc(a);
8295
8296 if (line_fill_time < wm->active_time)
8297 return latency;
8298 else
8299 return latency + (line_fill_time - wm->active_time);
8300
8301}
8302
8303/**
8304 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8305 * average and available dram bandwidth
8306 *
8307 * @wm: watermark calculation data
8308 *
8309 * Check if the display average bandwidth fits in the display
8310 * dram bandwidth (CIK).
8311 * Used for display watermark bandwidth calculations
8312 * Returns true if the display fits, false if not.
8313 */
8314static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8315{
8316 if (dce8_average_bandwidth(wm) <=
8317 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8318 return true;
8319 else
8320 return false;
8321}
8322
8323/**
8324 * dce8_average_bandwidth_vs_available_bandwidth - check
8325 * average and available bandwidth
8326 *
8327 * @wm: watermark calculation data
8328 *
8329 * Check if the display average bandwidth fits in the display
8330 * available bandwidth (CIK).
8331 * Used for display watermark bandwidth calculations
8332 * Returns true if the display fits, false if not.
8333 */
8334static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8335{
8336 if (dce8_average_bandwidth(wm) <=
8337 (dce8_available_bandwidth(wm) / wm->num_heads))
8338 return true;
8339 else
8340 return false;
8341}
8342
8343/**
8344 * dce8_check_latency_hiding - check latency hiding
8345 *
8346 * @wm: watermark calculation data
8347 *
8348 * Check latency hiding (CIK).
8349 * Used for display watermark bandwidth calculations
8350 * Returns true if the display fits, false if not.
8351 */
8352static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8353{
8354 u32 lb_partitions = wm->lb_size / wm->src_width;
8355 u32 line_time = wm->active_time + wm->blank_time;
8356 u32 latency_tolerant_lines;
8357 u32 latency_hiding;
8358 fixed20_12 a;
8359
8360 a.full = dfixed_const(1);
8361 if (wm->vsc.full > a.full)
8362 latency_tolerant_lines = 1;
8363 else {
8364 if (lb_partitions <= (wm->vtaps + 1))
8365 latency_tolerant_lines = 1;
8366 else
8367 latency_tolerant_lines = 2;
8368 }
8369
8370 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8371
8372 if (dce8_latency_watermark(wm) <= latency_hiding)
8373 return true;
8374 else
8375 return false;
8376}
8377
8378/**
8379 * dce8_program_watermarks - program display watermarks
8380 *
8381 * @rdev: radeon_device pointer
8382 * @radeon_crtc: the selected display controller
8383 * @lb_size: line buffer size
8384 * @num_heads: number of display controllers in use
8385 *
8386 * Calculate and program the display watermarks for the
8387 * selected display controller (CIK).
8388 */
8389static void dce8_program_watermarks(struct radeon_device *rdev,
8390 struct radeon_crtc *radeon_crtc,
8391 u32 lb_size, u32 num_heads)
8392{
8393 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05008394 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04008395 u32 pixel_period;
8396 u32 line_time = 0;
8397 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8398 u32 tmp, wm_mask;
8399
8400 if (radeon_crtc->base.enabled && num_heads && mode) {
8401 pixel_period = 1000000 / (u32)mode->clock;
8402 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8403
Alex Deucher58ea2de2013-01-24 10:03:39 -05008404 /* watermark for high clocks */
8405 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8406 rdev->pm.dpm_enabled) {
8407 wm_high.yclk =
8408 radeon_dpm_get_mclk(rdev, false) * 10;
8409 wm_high.sclk =
8410 radeon_dpm_get_sclk(rdev, false) * 10;
8411 } else {
8412 wm_high.yclk = rdev->pm.current_mclk * 10;
8413 wm_high.sclk = rdev->pm.current_sclk * 10;
8414 }
8415
8416 wm_high.disp_clk = mode->clock;
8417 wm_high.src_width = mode->crtc_hdisplay;
8418 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8419 wm_high.blank_time = line_time - wm_high.active_time;
8420 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04008421 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008422 wm_high.interlaced = true;
8423 wm_high.vsc = radeon_crtc->vsc;
8424 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04008425 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05008426 wm_high.vtaps = 2;
8427 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8428 wm_high.lb_size = lb_size;
8429 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8430 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04008431
8432 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008433 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04008434
8435 /* possibly force display priority to high */
8436 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05008437 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8438 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8439 !dce8_check_latency_hiding(&wm_high) ||
8440 (rdev->disp_priority == 2)) {
8441 DRM_DEBUG_KMS("force priority to high\n");
8442 }
8443
8444 /* watermark for low clocks */
8445 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8446 rdev->pm.dpm_enabled) {
8447 wm_low.yclk =
8448 radeon_dpm_get_mclk(rdev, true) * 10;
8449 wm_low.sclk =
8450 radeon_dpm_get_sclk(rdev, true) * 10;
8451 } else {
8452 wm_low.yclk = rdev->pm.current_mclk * 10;
8453 wm_low.sclk = rdev->pm.current_sclk * 10;
8454 }
8455
8456 wm_low.disp_clk = mode->clock;
8457 wm_low.src_width = mode->crtc_hdisplay;
8458 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8459 wm_low.blank_time = line_time - wm_low.active_time;
8460 wm_low.interlaced = false;
8461 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8462 wm_low.interlaced = true;
8463 wm_low.vsc = radeon_crtc->vsc;
8464 wm_low.vtaps = 1;
8465 if (radeon_crtc->rmx_type != RMX_OFF)
8466 wm_low.vtaps = 2;
8467 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8468 wm_low.lb_size = lb_size;
8469 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8470 wm_low.num_heads = num_heads;
8471
8472 /* set for low clocks */
8473 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8474
8475 /* possibly force display priority to high */
8476 /* should really do this at mode validation time... */
8477 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8478 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8479 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04008480 (rdev->disp_priority == 2)) {
8481 DRM_DEBUG_KMS("force priority to high\n");
8482 }
8483 }
8484
8485 /* select wm A */
8486 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8487 tmp = wm_mask;
8488 tmp &= ~LATENCY_WATERMARK_MASK(3);
8489 tmp |= LATENCY_WATERMARK_MASK(1);
8490 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8491 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8492 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8493 LATENCY_HIGH_WATERMARK(line_time)));
8494 /* select wm B */
8495 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8496 tmp &= ~LATENCY_WATERMARK_MASK(3);
8497 tmp |= LATENCY_WATERMARK_MASK(2);
8498 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8499 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8500 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8501 LATENCY_HIGH_WATERMARK(line_time)));
8502 /* restore original selection */
8503 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05008504
8505 /* save values for DPM */
8506 radeon_crtc->line_time = line_time;
8507 radeon_crtc->wm_high = latency_watermark_a;
8508 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04008509}
8510
8511/**
8512 * dce8_bandwidth_update - program display watermarks
8513 *
8514 * @rdev: radeon_device pointer
8515 *
8516 * Calculate and program the display watermarks and line
8517 * buffer allocation (CIK).
8518 */
8519void dce8_bandwidth_update(struct radeon_device *rdev)
8520{
8521 struct drm_display_mode *mode = NULL;
8522 u32 num_heads = 0, lb_size;
8523 int i;
8524
8525 radeon_update_display_priority(rdev);
8526
8527 for (i = 0; i < rdev->num_crtc; i++) {
8528 if (rdev->mode_info.crtcs[i]->base.enabled)
8529 num_heads++;
8530 }
8531 for (i = 0; i < rdev->num_crtc; i++) {
8532 mode = &rdev->mode_info.crtcs[i]->base.mode;
8533 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8534 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8535 }
8536}
Alex Deucher44fa3462012-12-18 22:17:00 -05008537
8538/**
8539 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8540 *
8541 * @rdev: radeon_device pointer
8542 *
8543 * Fetches a GPU clock counter snapshot (SI).
8544 * Returns the 64 bit clock counter snapshot.
8545 */
8546uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8547{
8548 uint64_t clock;
8549
8550 mutex_lock(&rdev->gpu_clock_mutex);
8551 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8552 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8553 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8554 mutex_unlock(&rdev->gpu_clock_mutex);
8555 return clock;
8556}
8557
Christian König87167bb2013-04-09 13:39:21 -04008558static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8559 u32 cntl_reg, u32 status_reg)
8560{
8561 int r, i;
8562 struct atom_clock_dividers dividers;
8563 uint32_t tmp;
8564
8565 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8566 clock, false, &dividers);
8567 if (r)
8568 return r;
8569
8570 tmp = RREG32_SMC(cntl_reg);
8571 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8572 tmp |= dividers.post_divider;
8573 WREG32_SMC(cntl_reg, tmp);
8574
8575 for (i = 0; i < 100; i++) {
8576 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8577 break;
8578 mdelay(10);
8579 }
8580 if (i == 100)
8581 return -ETIMEDOUT;
8582
8583 return 0;
8584}
8585
8586int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8587{
8588 int r = 0;
8589
8590 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8591 if (r)
8592 return r;
8593
8594 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8595 return r;
8596}
8597
Alex Deucher77df5082013-08-09 10:02:40 -04008598void cik_uvd_resume(struct radeon_device *rdev)
Christian König87167bb2013-04-09 13:39:21 -04008599{
8600 uint64_t addr;
8601 uint32_t size;
Christian König87167bb2013-04-09 13:39:21 -04008602
8603 /* programm the VCPU memory controller bits 0-27 */
8604 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02008605 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04008606 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8607 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8608
8609 addr += size;
8610 size = RADEON_UVD_STACK_SIZE >> 3;
8611 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8612 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8613
8614 addr += size;
8615 size = RADEON_UVD_HEAP_SIZE >> 3;
8616 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8617 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8618
8619 /* bits 28-31 */
8620 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8621 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8622
8623 /* bits 32-39 */
8624 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8625 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8626
Christian König87167bb2013-04-09 13:39:21 -04008627}
Alex Deucher8a7cd272013-08-06 11:29:39 -04008628
8629static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8630{
8631 struct pci_dev *root = rdev->pdev->bus->self;
8632 int bridge_pos, gpu_pos;
8633 u32 speed_cntl, mask, current_data_rate;
8634 int ret, i;
8635 u16 tmp16;
8636
8637 if (radeon_pcie_gen2 == 0)
8638 return;
8639
8640 if (rdev->flags & RADEON_IS_IGP)
8641 return;
8642
8643 if (!(rdev->flags & RADEON_IS_PCIE))
8644 return;
8645
8646 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8647 if (ret != 0)
8648 return;
8649
8650 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8651 return;
8652
8653 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8654 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8655 LC_CURRENT_DATA_RATE_SHIFT;
8656 if (mask & DRM_PCIE_SPEED_80) {
8657 if (current_data_rate == 2) {
8658 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8659 return;
8660 }
8661 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8662 } else if (mask & DRM_PCIE_SPEED_50) {
8663 if (current_data_rate == 1) {
8664 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8665 return;
8666 }
8667 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8668 }
8669
8670 bridge_pos = pci_pcie_cap(root);
8671 if (!bridge_pos)
8672 return;
8673
8674 gpu_pos = pci_pcie_cap(rdev->pdev);
8675 if (!gpu_pos)
8676 return;
8677
8678 if (mask & DRM_PCIE_SPEED_80) {
8679 /* re-try equalization if gen3 is not already enabled */
8680 if (current_data_rate != 2) {
8681 u16 bridge_cfg, gpu_cfg;
8682 u16 bridge_cfg2, gpu_cfg2;
8683 u32 max_lw, current_lw, tmp;
8684
8685 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8686 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8687
8688 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8689 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8690
8691 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8692 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8693
8694 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8695 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8696 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8697
8698 if (current_lw < max_lw) {
8699 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8700 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8701 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8702 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8703 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8704 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8705 }
8706 }
8707
8708 for (i = 0; i < 10; i++) {
8709 /* check status */
8710 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8711 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8712 break;
8713
8714 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8715 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8716
8717 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8718 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8719
8720 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8721 tmp |= LC_SET_QUIESCE;
8722 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8723
8724 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8725 tmp |= LC_REDO_EQ;
8726 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8727
8728 mdelay(100);
8729
8730 /* linkctl */
8731 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8732 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8733 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8734 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8735
8736 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8737 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8738 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8739 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8740
8741 /* linkctl2 */
8742 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8743 tmp16 &= ~((1 << 4) | (7 << 9));
8744 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8745 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8746
8747 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8748 tmp16 &= ~((1 << 4) | (7 << 9));
8749 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8750 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8751
8752 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8753 tmp &= ~LC_SET_QUIESCE;
8754 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8755 }
8756 }
8757 }
8758
8759 /* set the link speed */
8760 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8761 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8762 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8763
8764 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8765 tmp16 &= ~0xf;
8766 if (mask & DRM_PCIE_SPEED_80)
8767 tmp16 |= 3; /* gen3 */
8768 else if (mask & DRM_PCIE_SPEED_50)
8769 tmp16 |= 2; /* gen2 */
8770 else
8771 tmp16 |= 1; /* gen1 */
8772 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8773
8774 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8775 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8776 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8777
8778 for (i = 0; i < rdev->usec_timeout; i++) {
8779 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8780 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8781 break;
8782 udelay(1);
8783 }
8784}
Alex Deucher7235711a42013-04-04 13:58:09 -04008785
8786static void cik_program_aspm(struct radeon_device *rdev)
8787{
8788 u32 data, orig;
8789 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8790 bool disable_clkreq = false;
8791
8792 if (radeon_aspm == 0)
8793 return;
8794
8795 /* XXX double check IGPs */
8796 if (rdev->flags & RADEON_IS_IGP)
8797 return;
8798
8799 if (!(rdev->flags & RADEON_IS_PCIE))
8800 return;
8801
8802 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8803 data &= ~LC_XMIT_N_FTS_MASK;
8804 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8805 if (orig != data)
8806 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8807
8808 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8809 data |= LC_GO_TO_RECOVERY;
8810 if (orig != data)
8811 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8812
8813 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8814 data |= P_IGNORE_EDB_ERR;
8815 if (orig != data)
8816 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8817
8818 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8819 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8820 data |= LC_PMI_TO_L1_DIS;
8821 if (!disable_l0s)
8822 data |= LC_L0S_INACTIVITY(7);
8823
8824 if (!disable_l1) {
8825 data |= LC_L1_INACTIVITY(7);
8826 data &= ~LC_PMI_TO_L1_DIS;
8827 if (orig != data)
8828 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8829
8830 if (!disable_plloff_in_l1) {
8831 bool clk_req_support;
8832
8833 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8834 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8835 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8836 if (orig != data)
8837 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8838
8839 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8840 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8841 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8842 if (orig != data)
8843 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8844
8845 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8846 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8847 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8848 if (orig != data)
8849 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8850
8851 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8852 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8853 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8854 if (orig != data)
8855 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8856
8857 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8858 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8859 data |= LC_DYN_LANES_PWR_STATE(3);
8860 if (orig != data)
8861 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8862
8863 if (!disable_clkreq) {
8864 struct pci_dev *root = rdev->pdev->bus->self;
8865 u32 lnkcap;
8866
8867 clk_req_support = false;
8868 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8869 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8870 clk_req_support = true;
8871 } else {
8872 clk_req_support = false;
8873 }
8874
8875 if (clk_req_support) {
8876 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8877 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8878 if (orig != data)
8879 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8880
8881 orig = data = RREG32_SMC(THM_CLK_CNTL);
8882 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8883 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8884 if (orig != data)
8885 WREG32_SMC(THM_CLK_CNTL, data);
8886
8887 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8888 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8889 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8890 if (orig != data)
8891 WREG32_SMC(MISC_CLK_CTRL, data);
8892
8893 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8894 data &= ~BCLK_AS_XCLK;
8895 if (orig != data)
8896 WREG32_SMC(CG_CLKPIN_CNTL, data);
8897
8898 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8899 data &= ~FORCE_BIF_REFCLK_EN;
8900 if (orig != data)
8901 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8902
8903 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8904 data &= ~MPLL_CLKOUT_SEL_MASK;
8905 data |= MPLL_CLKOUT_SEL(4);
8906 if (orig != data)
8907 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8908 }
8909 }
8910 } else {
8911 if (orig != data)
8912 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8913 }
8914
8915 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8916 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8917 if (orig != data)
8918 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8919
8920 if (!disable_l0s) {
8921 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8922 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8923 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8924 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8925 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8926 data &= ~LC_L0S_INACTIVITY_MASK;
8927 if (orig != data)
8928 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8929 }
8930 }
8931 }
8932}