blob: 27891d87c1d3326a092bd092dba058c3a485fd5f [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040033
Alex Deucher02c81322012-12-18 21:43:07 -050034/* GFX */
35#define CIK_PFP_UCODE_SIZE 2144
36#define CIK_ME_UCODE_SIZE 2144
37#define CIK_CE_UCODE_SIZE 2144
38/* compute */
39#define CIK_MEC_UCODE_SIZE 4192
40/* interrupts */
41#define BONAIRE_RLC_UCODE_SIZE 2048
42#define KB_RLC_UCODE_SIZE 2560
43#define KV_RLC_UCODE_SIZE 2560
44/* gddr controller */
45#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040046/* sdma */
47#define CIK_SDMA_UCODE_SIZE 1050
48#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050049
50MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040056MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050057MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040062MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050063MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64MODULE_FIRMWARE("radeon/KABINI_me.bin");
65MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040068MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050069
Alex Deuchera59781b2012-11-09 10:45:57 -050070extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040072extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040074extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040075extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040076extern void si_rlc_fini(struct radeon_device *rdev);
77extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040078static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040079
Alex Deucher6e2c3c02013-04-03 19:28:32 -040080/*
81 * Indirect registers accessor
82 */
83u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84{
85 u32 r;
86
87 WREG32(PCIE_INDEX, reg);
88 (void)RREG32(PCIE_INDEX);
89 r = RREG32(PCIE_DATA);
90 return r;
91}
92
93void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94{
95 WREG32(PCIE_INDEX, reg);
96 (void)RREG32(PCIE_INDEX);
97 WREG32(PCIE_DATA, v);
98 (void)RREG32(PCIE_DATA);
99}
100
Alex Deucher0aafd312013-04-09 14:43:30 -0400101static const u32 bonaire_golden_spm_registers[] =
102{
103 0x30800, 0xe0ffffff, 0xe0000000
104};
105
106static const u32 bonaire_golden_common_registers[] =
107{
108 0xc770, 0xffffffff, 0x00000800,
109 0xc774, 0xffffffff, 0x00000800,
110 0xc798, 0xffffffff, 0x00007fbf,
111 0xc79c, 0xffffffff, 0x00007faf
112};
113
114static const u32 bonaire_golden_registers[] =
115{
116 0x3354, 0x00000333, 0x00000333,
117 0x3350, 0x000c0fc0, 0x00040200,
118 0x9a10, 0x00010000, 0x00058208,
119 0x3c000, 0xffff1fff, 0x00140000,
120 0x3c200, 0xfdfc0fff, 0x00000100,
121 0x3c234, 0x40000000, 0x40000200,
122 0x9830, 0xffffffff, 0x00000000,
123 0x9834, 0xf00fffff, 0x00000400,
124 0x9838, 0x0002021c, 0x00020200,
125 0xc78, 0x00000080, 0x00000000,
126 0x5bb0, 0x000000f0, 0x00000070,
127 0x5bc0, 0xf0311fff, 0x80300000,
128 0x98f8, 0x73773777, 0x12010001,
129 0x350c, 0x00810000, 0x408af000,
130 0x7030, 0x31000111, 0x00000011,
131 0x2f48, 0x73773777, 0x12010001,
132 0x220c, 0x00007fb6, 0x0021a1b1,
133 0x2210, 0x00007fb6, 0x002021b1,
134 0x2180, 0x00007fb6, 0x00002191,
135 0x2218, 0x00007fb6, 0x002121b1,
136 0x221c, 0x00007fb6, 0x002021b1,
137 0x21dc, 0x00007fb6, 0x00002191,
138 0x21e0, 0x00007fb6, 0x00002191,
139 0x3628, 0x0000003f, 0x0000000a,
140 0x362c, 0x0000003f, 0x0000000a,
141 0x2ae4, 0x00073ffe, 0x000022a2,
142 0x240c, 0x000007ff, 0x00000000,
143 0x8a14, 0xf000003f, 0x00000007,
144 0x8bf0, 0x00002001, 0x00000001,
145 0x8b24, 0xffffffff, 0x00ffffff,
146 0x30a04, 0x0000ff0f, 0x00000000,
147 0x28a4c, 0x07ffffff, 0x06000000,
148 0x4d8, 0x00000fff, 0x00000100,
149 0x3e78, 0x00000001, 0x00000002,
150 0x9100, 0x03000000, 0x0362c688,
151 0x8c00, 0x000000ff, 0x00000001,
152 0xe40, 0x00001fff, 0x00001fff,
153 0x9060, 0x0000007f, 0x00000020,
154 0x9508, 0x00010000, 0x00010000,
155 0xac14, 0x000003ff, 0x000000f3,
156 0xac0c, 0xffffffff, 0x00001032
157};
158
159static const u32 bonaire_mgcg_cgcg_init[] =
160{
161 0xc420, 0xffffffff, 0xfffffffc,
162 0x30800, 0xffffffff, 0xe0000000,
163 0x3c2a0, 0xffffffff, 0x00000100,
164 0x3c208, 0xffffffff, 0x00000100,
165 0x3c2c0, 0xffffffff, 0xc0000100,
166 0x3c2c8, 0xffffffff, 0xc0000100,
167 0x3c2c4, 0xffffffff, 0xc0000100,
168 0x55e4, 0xffffffff, 0x00600100,
169 0x3c280, 0xffffffff, 0x00000100,
170 0x3c214, 0xffffffff, 0x06000100,
171 0x3c220, 0xffffffff, 0x00000100,
172 0x3c218, 0xffffffff, 0x06000100,
173 0x3c204, 0xffffffff, 0x00000100,
174 0x3c2e0, 0xffffffff, 0x00000100,
175 0x3c224, 0xffffffff, 0x00000100,
176 0x3c200, 0xffffffff, 0x00000100,
177 0x3c230, 0xffffffff, 0x00000100,
178 0x3c234, 0xffffffff, 0x00000100,
179 0x3c250, 0xffffffff, 0x00000100,
180 0x3c254, 0xffffffff, 0x00000100,
181 0x3c258, 0xffffffff, 0x00000100,
182 0x3c25c, 0xffffffff, 0x00000100,
183 0x3c260, 0xffffffff, 0x00000100,
184 0x3c27c, 0xffffffff, 0x00000100,
185 0x3c278, 0xffffffff, 0x00000100,
186 0x3c210, 0xffffffff, 0x06000100,
187 0x3c290, 0xffffffff, 0x00000100,
188 0x3c274, 0xffffffff, 0x00000100,
189 0x3c2b4, 0xffffffff, 0x00000100,
190 0x3c2b0, 0xffffffff, 0x00000100,
191 0x3c270, 0xffffffff, 0x00000100,
192 0x30800, 0xffffffff, 0xe0000000,
193 0x3c020, 0xffffffff, 0x00010000,
194 0x3c024, 0xffffffff, 0x00030002,
195 0x3c028, 0xffffffff, 0x00040007,
196 0x3c02c, 0xffffffff, 0x00060005,
197 0x3c030, 0xffffffff, 0x00090008,
198 0x3c034, 0xffffffff, 0x00010000,
199 0x3c038, 0xffffffff, 0x00030002,
200 0x3c03c, 0xffffffff, 0x00040007,
201 0x3c040, 0xffffffff, 0x00060005,
202 0x3c044, 0xffffffff, 0x00090008,
203 0x3c048, 0xffffffff, 0x00010000,
204 0x3c04c, 0xffffffff, 0x00030002,
205 0x3c050, 0xffffffff, 0x00040007,
206 0x3c054, 0xffffffff, 0x00060005,
207 0x3c058, 0xffffffff, 0x00090008,
208 0x3c05c, 0xffffffff, 0x00010000,
209 0x3c060, 0xffffffff, 0x00030002,
210 0x3c064, 0xffffffff, 0x00040007,
211 0x3c068, 0xffffffff, 0x00060005,
212 0x3c06c, 0xffffffff, 0x00090008,
213 0x3c070, 0xffffffff, 0x00010000,
214 0x3c074, 0xffffffff, 0x00030002,
215 0x3c078, 0xffffffff, 0x00040007,
216 0x3c07c, 0xffffffff, 0x00060005,
217 0x3c080, 0xffffffff, 0x00090008,
218 0x3c084, 0xffffffff, 0x00010000,
219 0x3c088, 0xffffffff, 0x00030002,
220 0x3c08c, 0xffffffff, 0x00040007,
221 0x3c090, 0xffffffff, 0x00060005,
222 0x3c094, 0xffffffff, 0x00090008,
223 0x3c098, 0xffffffff, 0x00010000,
224 0x3c09c, 0xffffffff, 0x00030002,
225 0x3c0a0, 0xffffffff, 0x00040007,
226 0x3c0a4, 0xffffffff, 0x00060005,
227 0x3c0a8, 0xffffffff, 0x00090008,
228 0x3c000, 0xffffffff, 0x96e00200,
229 0x8708, 0xffffffff, 0x00900100,
230 0xc424, 0xffffffff, 0x0020003f,
231 0x38, 0xffffffff, 0x0140001c,
232 0x3c, 0x000f0000, 0x000f0000,
233 0x220, 0xffffffff, 0xC060000C,
234 0x224, 0xc0000fff, 0x00000100,
235 0xf90, 0xffffffff, 0x00000100,
236 0xf98, 0x00000101, 0x00000000,
237 0x20a8, 0xffffffff, 0x00000104,
238 0x55e4, 0xff000fff, 0x00000100,
239 0x30cc, 0xc0000fff, 0x00000104,
240 0xc1e4, 0x00000001, 0x00000001,
241 0xd00c, 0xff000ff0, 0x00000100,
242 0xd80c, 0xff000ff0, 0x00000100
243};
244
245static const u32 spectre_golden_spm_registers[] =
246{
247 0x30800, 0xe0ffffff, 0xe0000000
248};
249
250static const u32 spectre_golden_common_registers[] =
251{
252 0xc770, 0xffffffff, 0x00000800,
253 0xc774, 0xffffffff, 0x00000800,
254 0xc798, 0xffffffff, 0x00007fbf,
255 0xc79c, 0xffffffff, 0x00007faf
256};
257
258static const u32 spectre_golden_registers[] =
259{
260 0x3c000, 0xffff1fff, 0x96940200,
261 0x3c00c, 0xffff0001, 0xff000000,
262 0x3c200, 0xfffc0fff, 0x00000100,
263 0x6ed8, 0x00010101, 0x00010000,
264 0x9834, 0xf00fffff, 0x00000400,
265 0x9838, 0xfffffffc, 0x00020200,
266 0x5bb0, 0x000000f0, 0x00000070,
267 0x5bc0, 0xf0311fff, 0x80300000,
268 0x98f8, 0x73773777, 0x12010001,
269 0x9b7c, 0x00ff0000, 0x00fc0000,
270 0x2f48, 0x73773777, 0x12010001,
271 0x8a14, 0xf000003f, 0x00000007,
272 0x8b24, 0xffffffff, 0x00ffffff,
273 0x28350, 0x3f3f3fff, 0x00000082,
274 0x28355, 0x0000003f, 0x00000000,
275 0x3e78, 0x00000001, 0x00000002,
276 0x913c, 0xffff03df, 0x00000004,
277 0xc768, 0x00000008, 0x00000008,
278 0x8c00, 0x000008ff, 0x00000800,
279 0x9508, 0x00010000, 0x00010000,
280 0xac0c, 0xffffffff, 0x54763210,
281 0x214f8, 0x01ff01ff, 0x00000002,
282 0x21498, 0x007ff800, 0x00200000,
283 0x2015c, 0xffffffff, 0x00000f40,
284 0x30934, 0xffffffff, 0x00000001
285};
286
287static const u32 spectre_mgcg_cgcg_init[] =
288{
289 0xc420, 0xffffffff, 0xfffffffc,
290 0x30800, 0xffffffff, 0xe0000000,
291 0x3c2a0, 0xffffffff, 0x00000100,
292 0x3c208, 0xffffffff, 0x00000100,
293 0x3c2c0, 0xffffffff, 0x00000100,
294 0x3c2c8, 0xffffffff, 0x00000100,
295 0x3c2c4, 0xffffffff, 0x00000100,
296 0x55e4, 0xffffffff, 0x00600100,
297 0x3c280, 0xffffffff, 0x00000100,
298 0x3c214, 0xffffffff, 0x06000100,
299 0x3c220, 0xffffffff, 0x00000100,
300 0x3c218, 0xffffffff, 0x06000100,
301 0x3c204, 0xffffffff, 0x00000100,
302 0x3c2e0, 0xffffffff, 0x00000100,
303 0x3c224, 0xffffffff, 0x00000100,
304 0x3c200, 0xffffffff, 0x00000100,
305 0x3c230, 0xffffffff, 0x00000100,
306 0x3c234, 0xffffffff, 0x00000100,
307 0x3c250, 0xffffffff, 0x00000100,
308 0x3c254, 0xffffffff, 0x00000100,
309 0x3c258, 0xffffffff, 0x00000100,
310 0x3c25c, 0xffffffff, 0x00000100,
311 0x3c260, 0xffffffff, 0x00000100,
312 0x3c27c, 0xffffffff, 0x00000100,
313 0x3c278, 0xffffffff, 0x00000100,
314 0x3c210, 0xffffffff, 0x06000100,
315 0x3c290, 0xffffffff, 0x00000100,
316 0x3c274, 0xffffffff, 0x00000100,
317 0x3c2b4, 0xffffffff, 0x00000100,
318 0x3c2b0, 0xffffffff, 0x00000100,
319 0x3c270, 0xffffffff, 0x00000100,
320 0x30800, 0xffffffff, 0xe0000000,
321 0x3c020, 0xffffffff, 0x00010000,
322 0x3c024, 0xffffffff, 0x00030002,
323 0x3c028, 0xffffffff, 0x00040007,
324 0x3c02c, 0xffffffff, 0x00060005,
325 0x3c030, 0xffffffff, 0x00090008,
326 0x3c034, 0xffffffff, 0x00010000,
327 0x3c038, 0xffffffff, 0x00030002,
328 0x3c03c, 0xffffffff, 0x00040007,
329 0x3c040, 0xffffffff, 0x00060005,
330 0x3c044, 0xffffffff, 0x00090008,
331 0x3c048, 0xffffffff, 0x00010000,
332 0x3c04c, 0xffffffff, 0x00030002,
333 0x3c050, 0xffffffff, 0x00040007,
334 0x3c054, 0xffffffff, 0x00060005,
335 0x3c058, 0xffffffff, 0x00090008,
336 0x3c05c, 0xffffffff, 0x00010000,
337 0x3c060, 0xffffffff, 0x00030002,
338 0x3c064, 0xffffffff, 0x00040007,
339 0x3c068, 0xffffffff, 0x00060005,
340 0x3c06c, 0xffffffff, 0x00090008,
341 0x3c070, 0xffffffff, 0x00010000,
342 0x3c074, 0xffffffff, 0x00030002,
343 0x3c078, 0xffffffff, 0x00040007,
344 0x3c07c, 0xffffffff, 0x00060005,
345 0x3c080, 0xffffffff, 0x00090008,
346 0x3c084, 0xffffffff, 0x00010000,
347 0x3c088, 0xffffffff, 0x00030002,
348 0x3c08c, 0xffffffff, 0x00040007,
349 0x3c090, 0xffffffff, 0x00060005,
350 0x3c094, 0xffffffff, 0x00090008,
351 0x3c098, 0xffffffff, 0x00010000,
352 0x3c09c, 0xffffffff, 0x00030002,
353 0x3c0a0, 0xffffffff, 0x00040007,
354 0x3c0a4, 0xffffffff, 0x00060005,
355 0x3c0a8, 0xffffffff, 0x00090008,
356 0x3c0ac, 0xffffffff, 0x00010000,
357 0x3c0b0, 0xffffffff, 0x00030002,
358 0x3c0b4, 0xffffffff, 0x00040007,
359 0x3c0b8, 0xffffffff, 0x00060005,
360 0x3c0bc, 0xffffffff, 0x00090008,
361 0x3c000, 0xffffffff, 0x96e00200,
362 0x8708, 0xffffffff, 0x00900100,
363 0xc424, 0xffffffff, 0x0020003f,
364 0x38, 0xffffffff, 0x0140001c,
365 0x3c, 0x000f0000, 0x000f0000,
366 0x220, 0xffffffff, 0xC060000C,
367 0x224, 0xc0000fff, 0x00000100,
368 0xf90, 0xffffffff, 0x00000100,
369 0xf98, 0x00000101, 0x00000000,
370 0x20a8, 0xffffffff, 0x00000104,
371 0x55e4, 0xff000fff, 0x00000100,
372 0x30cc, 0xc0000fff, 0x00000104,
373 0xc1e4, 0x00000001, 0x00000001,
374 0xd00c, 0xff000ff0, 0x00000100,
375 0xd80c, 0xff000ff0, 0x00000100
376};
377
378static const u32 kalindi_golden_spm_registers[] =
379{
380 0x30800, 0xe0ffffff, 0xe0000000
381};
382
383static const u32 kalindi_golden_common_registers[] =
384{
385 0xc770, 0xffffffff, 0x00000800,
386 0xc774, 0xffffffff, 0x00000800,
387 0xc798, 0xffffffff, 0x00007fbf,
388 0xc79c, 0xffffffff, 0x00007faf
389};
390
391static const u32 kalindi_golden_registers[] =
392{
393 0x3c000, 0xffffdfff, 0x6e944040,
394 0x55e4, 0xff607fff, 0xfc000100,
395 0x3c220, 0xff000fff, 0x00000100,
396 0x3c224, 0xff000fff, 0x00000100,
397 0x3c200, 0xfffc0fff, 0x00000100,
398 0x6ed8, 0x00010101, 0x00010000,
399 0x9830, 0xffffffff, 0x00000000,
400 0x9834, 0xf00fffff, 0x00000400,
401 0x5bb0, 0x000000f0, 0x00000070,
402 0x5bc0, 0xf0311fff, 0x80300000,
403 0x98f8, 0x73773777, 0x12010001,
404 0x98fc, 0xffffffff, 0x00000010,
405 0x9b7c, 0x00ff0000, 0x00fc0000,
406 0x8030, 0x00001f0f, 0x0000100a,
407 0x2f48, 0x73773777, 0x12010001,
408 0x2408, 0x000fffff, 0x000c007f,
409 0x8a14, 0xf000003f, 0x00000007,
410 0x8b24, 0x3fff3fff, 0x00ffcfff,
411 0x30a04, 0x0000ff0f, 0x00000000,
412 0x28a4c, 0x07ffffff, 0x06000000,
413 0x4d8, 0x00000fff, 0x00000100,
414 0x3e78, 0x00000001, 0x00000002,
415 0xc768, 0x00000008, 0x00000008,
416 0x8c00, 0x000000ff, 0x00000003,
417 0x214f8, 0x01ff01ff, 0x00000002,
418 0x21498, 0x007ff800, 0x00200000,
419 0x2015c, 0xffffffff, 0x00000f40,
420 0x88c4, 0x001f3ae3, 0x00000082,
421 0x88d4, 0x0000001f, 0x00000010,
422 0x30934, 0xffffffff, 0x00000000
423};
424
425static const u32 kalindi_mgcg_cgcg_init[] =
426{
427 0xc420, 0xffffffff, 0xfffffffc,
428 0x30800, 0xffffffff, 0xe0000000,
429 0x3c2a0, 0xffffffff, 0x00000100,
430 0x3c208, 0xffffffff, 0x00000100,
431 0x3c2c0, 0xffffffff, 0x00000100,
432 0x3c2c8, 0xffffffff, 0x00000100,
433 0x3c2c4, 0xffffffff, 0x00000100,
434 0x55e4, 0xffffffff, 0x00600100,
435 0x3c280, 0xffffffff, 0x00000100,
436 0x3c214, 0xffffffff, 0x06000100,
437 0x3c220, 0xffffffff, 0x00000100,
438 0x3c218, 0xffffffff, 0x06000100,
439 0x3c204, 0xffffffff, 0x00000100,
440 0x3c2e0, 0xffffffff, 0x00000100,
441 0x3c224, 0xffffffff, 0x00000100,
442 0x3c200, 0xffffffff, 0x00000100,
443 0x3c230, 0xffffffff, 0x00000100,
444 0x3c234, 0xffffffff, 0x00000100,
445 0x3c250, 0xffffffff, 0x00000100,
446 0x3c254, 0xffffffff, 0x00000100,
447 0x3c258, 0xffffffff, 0x00000100,
448 0x3c25c, 0xffffffff, 0x00000100,
449 0x3c260, 0xffffffff, 0x00000100,
450 0x3c27c, 0xffffffff, 0x00000100,
451 0x3c278, 0xffffffff, 0x00000100,
452 0x3c210, 0xffffffff, 0x06000100,
453 0x3c290, 0xffffffff, 0x00000100,
454 0x3c274, 0xffffffff, 0x00000100,
455 0x3c2b4, 0xffffffff, 0x00000100,
456 0x3c2b0, 0xffffffff, 0x00000100,
457 0x3c270, 0xffffffff, 0x00000100,
458 0x30800, 0xffffffff, 0xe0000000,
459 0x3c020, 0xffffffff, 0x00010000,
460 0x3c024, 0xffffffff, 0x00030002,
461 0x3c028, 0xffffffff, 0x00040007,
462 0x3c02c, 0xffffffff, 0x00060005,
463 0x3c030, 0xffffffff, 0x00090008,
464 0x3c034, 0xffffffff, 0x00010000,
465 0x3c038, 0xffffffff, 0x00030002,
466 0x3c03c, 0xffffffff, 0x00040007,
467 0x3c040, 0xffffffff, 0x00060005,
468 0x3c044, 0xffffffff, 0x00090008,
469 0x3c000, 0xffffffff, 0x96e00200,
470 0x8708, 0xffffffff, 0x00900100,
471 0xc424, 0xffffffff, 0x0020003f,
472 0x38, 0xffffffff, 0x0140001c,
473 0x3c, 0x000f0000, 0x000f0000,
474 0x220, 0xffffffff, 0xC060000C,
475 0x224, 0xc0000fff, 0x00000100,
476 0x20a8, 0xffffffff, 0x00000104,
477 0x55e4, 0xff000fff, 0x00000100,
478 0x30cc, 0xc0000fff, 0x00000104,
479 0xc1e4, 0x00000001, 0x00000001,
480 0xd00c, 0xff000ff0, 0x00000100,
481 0xd80c, 0xff000ff0, 0x00000100
482};
483
484static void cik_init_golden_registers(struct radeon_device *rdev)
485{
486 switch (rdev->family) {
487 case CHIP_BONAIRE:
488 radeon_program_register_sequence(rdev,
489 bonaire_mgcg_cgcg_init,
490 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491 radeon_program_register_sequence(rdev,
492 bonaire_golden_registers,
493 (const u32)ARRAY_SIZE(bonaire_golden_registers));
494 radeon_program_register_sequence(rdev,
495 bonaire_golden_common_registers,
496 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497 radeon_program_register_sequence(rdev,
498 bonaire_golden_spm_registers,
499 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500 break;
501 case CHIP_KABINI:
502 radeon_program_register_sequence(rdev,
503 kalindi_mgcg_cgcg_init,
504 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505 radeon_program_register_sequence(rdev,
506 kalindi_golden_registers,
507 (const u32)ARRAY_SIZE(kalindi_golden_registers));
508 radeon_program_register_sequence(rdev,
509 kalindi_golden_common_registers,
510 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511 radeon_program_register_sequence(rdev,
512 kalindi_golden_spm_registers,
513 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514 break;
515 case CHIP_KAVERI:
516 radeon_program_register_sequence(rdev,
517 spectre_mgcg_cgcg_init,
518 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519 radeon_program_register_sequence(rdev,
520 spectre_golden_registers,
521 (const u32)ARRAY_SIZE(spectre_golden_registers));
522 radeon_program_register_sequence(rdev,
523 spectre_golden_common_registers,
524 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525 radeon_program_register_sequence(rdev,
526 spectre_golden_spm_registers,
527 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528 break;
529 default:
530 break;
531 }
532}
533
Alex Deucher2c679122013-04-09 13:32:18 -0400534/**
535 * cik_get_xclk - get the xclk
536 *
537 * @rdev: radeon_device pointer
538 *
539 * Returns the reference clock used by the gfx engine
540 * (CIK).
541 */
542u32 cik_get_xclk(struct radeon_device *rdev)
543{
544 u32 reference_clock = rdev->clock.spll.reference_freq;
545
546 if (rdev->flags & RADEON_IS_IGP) {
547 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548 return reference_clock / 2;
549 } else {
550 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551 return reference_clock / 4;
552 }
553 return reference_clock;
554}
555
Alex Deucher75efdee2013-03-04 12:47:46 -0500556/**
557 * cik_mm_rdoorbell - read a doorbell dword
558 *
559 * @rdev: radeon_device pointer
560 * @offset: byte offset into the aperture
561 *
562 * Returns the value in the doorbell aperture at the
563 * requested offset (CIK).
564 */
565u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566{
567 if (offset < rdev->doorbell.size) {
568 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569 } else {
570 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571 return 0;
572 }
573}
574
575/**
576 * cik_mm_wdoorbell - write a doorbell dword
577 *
578 * @rdev: radeon_device pointer
579 * @offset: byte offset into the aperture
580 * @v: value to write
581 *
582 * Writes @v to the doorbell aperture at the
583 * requested offset (CIK).
584 */
585void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586{
587 if (offset < rdev->doorbell.size) {
588 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589 } else {
590 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591 }
592}
593
Alex Deucherbc8273f2012-06-29 19:44:04 -0400594#define BONAIRE_IO_MC_REGS_SIZE 36
595
596static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597{
598 {0x00000070, 0x04400000},
599 {0x00000071, 0x80c01803},
600 {0x00000072, 0x00004004},
601 {0x00000073, 0x00000100},
602 {0x00000074, 0x00ff0000},
603 {0x00000075, 0x34000000},
604 {0x00000076, 0x08000014},
605 {0x00000077, 0x00cc08ec},
606 {0x00000078, 0x00000400},
607 {0x00000079, 0x00000000},
608 {0x0000007a, 0x04090000},
609 {0x0000007c, 0x00000000},
610 {0x0000007e, 0x4408a8e8},
611 {0x0000007f, 0x00000304},
612 {0x00000080, 0x00000000},
613 {0x00000082, 0x00000001},
614 {0x00000083, 0x00000002},
615 {0x00000084, 0xf3e4f400},
616 {0x00000085, 0x052024e3},
617 {0x00000087, 0x00000000},
618 {0x00000088, 0x01000000},
619 {0x0000008a, 0x1c0a0000},
620 {0x0000008b, 0xff010000},
621 {0x0000008d, 0xffffefff},
622 {0x0000008e, 0xfff3efff},
623 {0x0000008f, 0xfff3efbf},
624 {0x00000092, 0xf7ffffff},
625 {0x00000093, 0xffffff7f},
626 {0x00000095, 0x00101101},
627 {0x00000096, 0x00000fff},
628 {0x00000097, 0x00116fff},
629 {0x00000098, 0x60010000},
630 {0x00000099, 0x10010000},
631 {0x0000009a, 0x00006000},
632 {0x0000009b, 0x00001000},
633 {0x0000009f, 0x00b48000}
634};
635
Alex Deucherb556b122013-01-29 10:44:22 -0500636/**
637 * cik_srbm_select - select specific register instances
638 *
639 * @rdev: radeon_device pointer
640 * @me: selected ME (micro engine)
641 * @pipe: pipe
642 * @queue: queue
643 * @vmid: VMID
644 *
645 * Switches the currently active registers instances. Some
646 * registers are instanced per VMID, others are instanced per
647 * me/pipe/queue combination.
648 */
649static void cik_srbm_select(struct radeon_device *rdev,
650 u32 me, u32 pipe, u32 queue, u32 vmid)
651{
652 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653 MEID(me & 0x3) |
654 VMID(vmid & 0xf) |
655 QUEUEID(queue & 0x7));
656 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657}
658
Alex Deucherbc8273f2012-06-29 19:44:04 -0400659/* ucode loading */
660/**
661 * ci_mc_load_microcode - load MC ucode into the hw
662 *
663 * @rdev: radeon_device pointer
664 *
665 * Load the GDDR MC ucode into the hw (CIK).
666 * Returns 0 on success, error on failure.
667 */
668static int ci_mc_load_microcode(struct radeon_device *rdev)
669{
670 const __be32 *fw_data;
671 u32 running, blackout = 0;
672 u32 *io_mc_regs;
673 int i, ucode_size, regs_size;
674
675 if (!rdev->mc_fw)
676 return -EINVAL;
677
678 switch (rdev->family) {
679 case CHIP_BONAIRE:
680 default:
681 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682 ucode_size = CIK_MC_UCODE_SIZE;
683 regs_size = BONAIRE_IO_MC_REGS_SIZE;
684 break;
685 }
686
687 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688
689 if (running == 0) {
690 if (running) {
691 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693 }
694
695 /* reset the engine and set to writable */
696 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698
699 /* load mc io regs */
700 for (i = 0; i < regs_size; i++) {
701 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703 }
704 /* load the MC ucode */
705 fw_data = (const __be32 *)rdev->mc_fw->data;
706 for (i = 0; i < ucode_size; i++)
707 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708
709 /* put the engine back into the active state */
710 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713
714 /* wait for training to complete */
715 for (i = 0; i < rdev->usec_timeout; i++) {
716 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717 break;
718 udelay(1);
719 }
720 for (i = 0; i < rdev->usec_timeout; i++) {
721 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722 break;
723 udelay(1);
724 }
725
726 if (running)
727 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728 }
729
730 return 0;
731}
732
Alex Deucher02c81322012-12-18 21:43:07 -0500733/**
734 * cik_init_microcode - load ucode images from disk
735 *
736 * @rdev: radeon_device pointer
737 *
738 * Use the firmware interface to load the ucode images into
739 * the driver (not loaded into hw).
740 * Returns 0 on success, error on failure.
741 */
742static int cik_init_microcode(struct radeon_device *rdev)
743{
Alex Deucher02c81322012-12-18 21:43:07 -0500744 const char *chip_name;
745 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400746 mec_req_size, rlc_req_size, mc_req_size,
747 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500748 char fw_name[30];
749 int err;
750
751 DRM_DEBUG("\n");
752
Alex Deucher02c81322012-12-18 21:43:07 -0500753 switch (rdev->family) {
754 case CHIP_BONAIRE:
755 chip_name = "BONAIRE";
756 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757 me_req_size = CIK_ME_UCODE_SIZE * 4;
758 ce_req_size = CIK_CE_UCODE_SIZE * 4;
759 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500763 break;
764 case CHIP_KAVERI:
765 chip_name = "KAVERI";
766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 me_req_size = CIK_ME_UCODE_SIZE * 4;
768 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500772 break;
773 case CHIP_KABINI:
774 chip_name = "KABINI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500781 break;
782 default: BUG();
783 }
784
785 DRM_INFO("Loading %s Microcode\n", chip_name);
786
787 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400788 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500789 if (err)
790 goto out;
791 if (rdev->pfp_fw->size != pfp_req_size) {
792 printk(KERN_ERR
793 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 rdev->pfp_fw->size, fw_name);
795 err = -EINVAL;
796 goto out;
797 }
798
799 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400800 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500801 if (err)
802 goto out;
803 if (rdev->me_fw->size != me_req_size) {
804 printk(KERN_ERR
805 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 rdev->me_fw->size, fw_name);
807 err = -EINVAL;
808 }
809
810 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400811 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500812 if (err)
813 goto out;
814 if (rdev->ce_fw->size != ce_req_size) {
815 printk(KERN_ERR
816 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 rdev->ce_fw->size, fw_name);
818 err = -EINVAL;
819 }
820
821 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400822 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500823 if (err)
824 goto out;
825 if (rdev->mec_fw->size != mec_req_size) {
826 printk(KERN_ERR
827 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 rdev->mec_fw->size, fw_name);
829 err = -EINVAL;
830 }
831
832 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400833 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500834 if (err)
835 goto out;
836 if (rdev->rlc_fw->size != rlc_req_size) {
837 printk(KERN_ERR
838 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 rdev->rlc_fw->size, fw_name);
840 err = -EINVAL;
841 }
842
Alex Deucher21a93e12013-04-09 12:47:11 -0400843 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400844 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -0400845 if (err)
846 goto out;
847 if (rdev->sdma_fw->size != sdma_req_size) {
848 printk(KERN_ERR
849 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 rdev->sdma_fw->size, fw_name);
851 err = -EINVAL;
852 }
853
Alex Deucher02c81322012-12-18 21:43:07 -0500854 /* No MC ucode on APUs */
855 if (!(rdev->flags & RADEON_IS_IGP)) {
856 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400857 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500858 if (err)
859 goto out;
860 if (rdev->mc_fw->size != mc_req_size) {
861 printk(KERN_ERR
862 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 rdev->mc_fw->size, fw_name);
864 err = -EINVAL;
865 }
866 }
867
868out:
Alex Deucher02c81322012-12-18 21:43:07 -0500869 if (err) {
870 if (err != -EINVAL)
871 printk(KERN_ERR
872 "cik_cp: Failed to load firmware \"%s\"\n",
873 fw_name);
874 release_firmware(rdev->pfp_fw);
875 rdev->pfp_fw = NULL;
876 release_firmware(rdev->me_fw);
877 rdev->me_fw = NULL;
878 release_firmware(rdev->ce_fw);
879 rdev->ce_fw = NULL;
880 release_firmware(rdev->rlc_fw);
881 rdev->rlc_fw = NULL;
882 release_firmware(rdev->mc_fw);
883 rdev->mc_fw = NULL;
884 }
885 return err;
886}
887
Alex Deucher8cc1a532013-04-09 12:41:24 -0400888/*
889 * Core functions
890 */
891/**
892 * cik_tiling_mode_table_init - init the hw tiling table
893 *
894 * @rdev: radeon_device pointer
895 *
896 * Starting with SI, the tiling setup is done globally in a
897 * set of 32 tiling modes. Rather than selecting each set of
898 * parameters per surface as on older asics, we just select
899 * which index in the tiling table we want to use, and the
900 * surface uses those parameters (CIK).
901 */
902static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903{
904 const u32 num_tile_mode_states = 32;
905 const u32 num_secondary_tile_mode_states = 16;
906 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907 u32 num_pipe_configs;
908 u32 num_rbs = rdev->config.cik.max_backends_per_se *
909 rdev->config.cik.max_shader_engines;
910
911 switch (rdev->config.cik.mem_row_size_in_kb) {
912 case 1:
913 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914 break;
915 case 2:
916 default:
917 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918 break;
919 case 4:
920 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921 break;
922 }
923
924 num_pipe_configs = rdev->config.cik.max_tile_pipes;
925 if (num_pipe_configs > 8)
926 num_pipe_configs = 8; /* ??? */
927
928 if (num_pipe_configs == 8) {
929 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930 switch (reg_offset) {
931 case 0:
932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936 break;
937 case 1:
938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942 break;
943 case 2:
944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948 break;
949 case 3:
950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954 break;
955 case 4:
956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959 TILE_SPLIT(split_equal_to_row_size));
960 break;
961 case 5:
962 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964 break;
965 case 6:
966 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970 break;
971 case 7:
972 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 TILE_SPLIT(split_equal_to_row_size));
976 break;
977 case 8:
978 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980 break;
981 case 9:
982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984 break;
985 case 10:
986 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990 break;
991 case 11:
992 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996 break;
997 case 12:
998 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002 break;
1003 case 13:
1004 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006 break;
1007 case 14:
1008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 break;
1013 case 16:
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018 break;
1019 case 17:
1020 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 break;
1025 case 27:
1026 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028 break;
1029 case 28:
1030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 break;
1035 case 29:
1036 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040 break;
1041 case 30:
1042 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 break;
1047 default:
1048 gb_tile_moden = 0;
1049 break;
1050 }
Alex Deucher39aee492013-04-10 13:41:25 -04001051 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001052 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053 }
1054 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055 switch (reg_offset) {
1056 case 0:
1057 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060 NUM_BANKS(ADDR_SURF_16_BANK));
1061 break;
1062 case 1:
1063 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 NUM_BANKS(ADDR_SURF_16_BANK));
1067 break;
1068 case 2:
1069 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072 NUM_BANKS(ADDR_SURF_16_BANK));
1073 break;
1074 case 3:
1075 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078 NUM_BANKS(ADDR_SURF_16_BANK));
1079 break;
1080 case 4:
1081 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 NUM_BANKS(ADDR_SURF_8_BANK));
1085 break;
1086 case 5:
1087 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090 NUM_BANKS(ADDR_SURF_4_BANK));
1091 break;
1092 case 6:
1093 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096 NUM_BANKS(ADDR_SURF_2_BANK));
1097 break;
1098 case 8:
1099 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102 NUM_BANKS(ADDR_SURF_16_BANK));
1103 break;
1104 case 9:
1105 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108 NUM_BANKS(ADDR_SURF_16_BANK));
1109 break;
1110 case 10:
1111 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114 NUM_BANKS(ADDR_SURF_16_BANK));
1115 break;
1116 case 11:
1117 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120 NUM_BANKS(ADDR_SURF_16_BANK));
1121 break;
1122 case 12:
1123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 NUM_BANKS(ADDR_SURF_8_BANK));
1127 break;
1128 case 13:
1129 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132 NUM_BANKS(ADDR_SURF_4_BANK));
1133 break;
1134 case 14:
1135 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138 NUM_BANKS(ADDR_SURF_2_BANK));
1139 break;
1140 default:
1141 gb_tile_moden = 0;
1142 break;
1143 }
1144 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145 }
1146 } else if (num_pipe_configs == 4) {
1147 if (num_rbs == 4) {
1148 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149 switch (reg_offset) {
1150 case 0:
1151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155 break;
1156 case 1:
1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161 break;
1162 case 2:
1163 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167 break;
1168 case 3:
1169 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173 break;
1174 case 4:
1175 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178 TILE_SPLIT(split_equal_to_row_size));
1179 break;
1180 case 5:
1181 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183 break;
1184 case 6:
1185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189 break;
1190 case 7:
1191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 TILE_SPLIT(split_equal_to_row_size));
1195 break;
1196 case 8:
1197 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199 break;
1200 case 9:
1201 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203 break;
1204 case 10:
1205 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209 break;
1210 case 11:
1211 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215 break;
1216 case 12:
1217 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221 break;
1222 case 13:
1223 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225 break;
1226 case 14:
1227 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 break;
1232 case 16:
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237 break;
1238 case 17:
1239 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243 break;
1244 case 27:
1245 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247 break;
1248 case 28:
1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 break;
1254 case 29:
1255 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259 break;
1260 case 30:
1261 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 break;
1266 default:
1267 gb_tile_moden = 0;
1268 break;
1269 }
Alex Deucher39aee492013-04-10 13:41:25 -04001270 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001271 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272 }
1273 } else if (num_rbs < 4) {
1274 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275 switch (reg_offset) {
1276 case 0:
1277 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281 break;
1282 case 1:
1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287 break;
1288 case 2:
1289 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293 break;
1294 case 3:
1295 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299 break;
1300 case 4:
1301 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304 TILE_SPLIT(split_equal_to_row_size));
1305 break;
1306 case 5:
1307 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309 break;
1310 case 6:
1311 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315 break;
1316 case 7:
1317 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 TILE_SPLIT(split_equal_to_row_size));
1321 break;
1322 case 8:
1323 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325 break;
1326 case 9:
1327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329 break;
1330 case 10:
1331 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335 break;
1336 case 11:
1337 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341 break;
1342 case 12:
1343 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347 break;
1348 case 13:
1349 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351 break;
1352 case 14:
1353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 break;
1358 case 16:
1359 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363 break;
1364 case 17:
1365 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369 break;
1370 case 27:
1371 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373 break;
1374 case 28:
1375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379 break;
1380 case 29:
1381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385 break;
1386 case 30:
1387 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 break;
1392 default:
1393 gb_tile_moden = 0;
1394 break;
1395 }
Alex Deucher39aee492013-04-10 13:41:25 -04001396 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001397 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398 }
1399 }
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 switch (reg_offset) {
1402 case 0:
1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 NUM_BANKS(ADDR_SURF_16_BANK));
1407 break;
1408 case 1:
1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 NUM_BANKS(ADDR_SURF_16_BANK));
1413 break;
1414 case 2:
1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 NUM_BANKS(ADDR_SURF_16_BANK));
1419 break;
1420 case 3:
1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 NUM_BANKS(ADDR_SURF_16_BANK));
1425 break;
1426 case 4:
1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 NUM_BANKS(ADDR_SURF_16_BANK));
1431 break;
1432 case 5:
1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436 NUM_BANKS(ADDR_SURF_8_BANK));
1437 break;
1438 case 6:
1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 NUM_BANKS(ADDR_SURF_4_BANK));
1443 break;
1444 case 8:
1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 NUM_BANKS(ADDR_SURF_16_BANK));
1449 break;
1450 case 9:
1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 NUM_BANKS(ADDR_SURF_16_BANK));
1455 break;
1456 case 10:
1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460 NUM_BANKS(ADDR_SURF_16_BANK));
1461 break;
1462 case 11:
1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466 NUM_BANKS(ADDR_SURF_16_BANK));
1467 break;
1468 case 12:
1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472 NUM_BANKS(ADDR_SURF_16_BANK));
1473 break;
1474 case 13:
1475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478 NUM_BANKS(ADDR_SURF_8_BANK));
1479 break;
1480 case 14:
1481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 NUM_BANKS(ADDR_SURF_4_BANK));
1485 break;
1486 default:
1487 gb_tile_moden = 0;
1488 break;
1489 }
1490 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491 }
1492 } else if (num_pipe_configs == 2) {
1493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494 switch (reg_offset) {
1495 case 0:
1496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 PIPE_CONFIG(ADDR_SURF_P2) |
1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500 break;
1501 case 1:
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 PIPE_CONFIG(ADDR_SURF_P2) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506 break;
1507 case 2:
1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 PIPE_CONFIG(ADDR_SURF_P2) |
1511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512 break;
1513 case 3:
1514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516 PIPE_CONFIG(ADDR_SURF_P2) |
1517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518 break;
1519 case 4:
1520 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522 PIPE_CONFIG(ADDR_SURF_P2) |
1523 TILE_SPLIT(split_equal_to_row_size));
1524 break;
1525 case 5:
1526 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528 break;
1529 case 6:
1530 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532 PIPE_CONFIG(ADDR_SURF_P2) |
1533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534 break;
1535 case 7:
1536 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538 PIPE_CONFIG(ADDR_SURF_P2) |
1539 TILE_SPLIT(split_equal_to_row_size));
1540 break;
1541 case 8:
1542 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543 break;
1544 case 9:
1545 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547 break;
1548 case 10:
1549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 PIPE_CONFIG(ADDR_SURF_P2) |
1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553 break;
1554 case 11:
1555 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 PIPE_CONFIG(ADDR_SURF_P2) |
1558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559 break;
1560 case 12:
1561 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 PIPE_CONFIG(ADDR_SURF_P2) |
1564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565 break;
1566 case 13:
1567 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569 break;
1570 case 14:
1571 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 PIPE_CONFIG(ADDR_SURF_P2) |
1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575 break;
1576 case 16:
1577 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579 PIPE_CONFIG(ADDR_SURF_P2) |
1580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581 break;
1582 case 17:
1583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585 PIPE_CONFIG(ADDR_SURF_P2) |
1586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587 break;
1588 case 27:
1589 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591 break;
1592 case 28:
1593 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 PIPE_CONFIG(ADDR_SURF_P2) |
1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597 break;
1598 case 29:
1599 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601 PIPE_CONFIG(ADDR_SURF_P2) |
1602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603 break;
1604 case 30:
1605 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607 PIPE_CONFIG(ADDR_SURF_P2) |
1608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 break;
1610 default:
1611 gb_tile_moden = 0;
1612 break;
1613 }
Alex Deucher39aee492013-04-10 13:41:25 -04001614 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001615 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616 }
1617 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618 switch (reg_offset) {
1619 case 0:
1620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 NUM_BANKS(ADDR_SURF_16_BANK));
1624 break;
1625 case 1:
1626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 NUM_BANKS(ADDR_SURF_16_BANK));
1630 break;
1631 case 2:
1632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 NUM_BANKS(ADDR_SURF_16_BANK));
1636 break;
1637 case 3:
1638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 NUM_BANKS(ADDR_SURF_16_BANK));
1642 break;
1643 case 4:
1644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647 NUM_BANKS(ADDR_SURF_16_BANK));
1648 break;
1649 case 5:
1650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 NUM_BANKS(ADDR_SURF_16_BANK));
1654 break;
1655 case 6:
1656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659 NUM_BANKS(ADDR_SURF_8_BANK));
1660 break;
1661 case 8:
1662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 NUM_BANKS(ADDR_SURF_16_BANK));
1666 break;
1667 case 9:
1668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 NUM_BANKS(ADDR_SURF_16_BANK));
1672 break;
1673 case 10:
1674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 NUM_BANKS(ADDR_SURF_16_BANK));
1678 break;
1679 case 11:
1680 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 NUM_BANKS(ADDR_SURF_16_BANK));
1684 break;
1685 case 12:
1686 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689 NUM_BANKS(ADDR_SURF_16_BANK));
1690 break;
1691 case 13:
1692 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 NUM_BANKS(ADDR_SURF_16_BANK));
1696 break;
1697 case 14:
1698 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701 NUM_BANKS(ADDR_SURF_8_BANK));
1702 break;
1703 default:
1704 gb_tile_moden = 0;
1705 break;
1706 }
1707 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708 }
1709 } else
1710 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711}
1712
1713/**
1714 * cik_select_se_sh - select which SE, SH to address
1715 *
1716 * @rdev: radeon_device pointer
1717 * @se_num: shader engine to address
1718 * @sh_num: sh block to address
1719 *
1720 * Select which SE, SH combinations to address. Certain
1721 * registers are instanced per SE or SH. 0xffffffff means
1722 * broadcast to all SEs or SHs (CIK).
1723 */
1724static void cik_select_se_sh(struct radeon_device *rdev,
1725 u32 se_num, u32 sh_num)
1726{
1727 u32 data = INSTANCE_BROADCAST_WRITES;
1728
1729 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04001730 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001731 else if (se_num == 0xffffffff)
1732 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733 else if (sh_num == 0xffffffff)
1734 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735 else
1736 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737 WREG32(GRBM_GFX_INDEX, data);
1738}
1739
1740/**
1741 * cik_create_bitmask - create a bitmask
1742 *
1743 * @bit_width: length of the mask
1744 *
1745 * create a variable length bit mask (CIK).
1746 * Returns the bitmask.
1747 */
1748static u32 cik_create_bitmask(u32 bit_width)
1749{
1750 u32 i, mask = 0;
1751
1752 for (i = 0; i < bit_width; i++) {
1753 mask <<= 1;
1754 mask |= 1;
1755 }
1756 return mask;
1757}
1758
1759/**
1760 * cik_select_se_sh - select which SE, SH to address
1761 *
1762 * @rdev: radeon_device pointer
1763 * @max_rb_num: max RBs (render backends) for the asic
1764 * @se_num: number of SEs (shader engines) for the asic
1765 * @sh_per_se: number of SH blocks per SE for the asic
1766 *
1767 * Calculates the bitmask of disabled RBs (CIK).
1768 * Returns the disabled RB bitmask.
1769 */
1770static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771 u32 max_rb_num, u32 se_num,
1772 u32 sh_per_se)
1773{
1774 u32 data, mask;
1775
1776 data = RREG32(CC_RB_BACKEND_DISABLE);
1777 if (data & 1)
1778 data &= BACKEND_DISABLE_MASK;
1779 else
1780 data = 0;
1781 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782
1783 data >>= BACKEND_DISABLE_SHIFT;
1784
1785 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786
1787 return data & mask;
1788}
1789
1790/**
1791 * cik_setup_rb - setup the RBs on the asic
1792 *
1793 * @rdev: radeon_device pointer
1794 * @se_num: number of SEs (shader engines) for the asic
1795 * @sh_per_se: number of SH blocks per SE for the asic
1796 * @max_rb_num: max RBs (render backends) for the asic
1797 *
1798 * Configures per-SE/SH RB registers (CIK).
1799 */
1800static void cik_setup_rb(struct radeon_device *rdev,
1801 u32 se_num, u32 sh_per_se,
1802 u32 max_rb_num)
1803{
1804 int i, j;
1805 u32 data, mask;
1806 u32 disabled_rbs = 0;
1807 u32 enabled_rbs = 0;
1808
1809 for (i = 0; i < se_num; i++) {
1810 for (j = 0; j < sh_per_se; j++) {
1811 cik_select_se_sh(rdev, i, j);
1812 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814 }
1815 }
1816 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817
1818 mask = 1;
1819 for (i = 0; i < max_rb_num; i++) {
1820 if (!(disabled_rbs & mask))
1821 enabled_rbs |= mask;
1822 mask <<= 1;
1823 }
1824
1825 for (i = 0; i < se_num; i++) {
1826 cik_select_se_sh(rdev, i, 0xffffffff);
1827 data = 0;
1828 for (j = 0; j < sh_per_se; j++) {
1829 switch (enabled_rbs & 3) {
1830 case 1:
1831 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832 break;
1833 case 2:
1834 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835 break;
1836 case 3:
1837 default:
1838 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839 break;
1840 }
1841 enabled_rbs >>= 2;
1842 }
1843 WREG32(PA_SC_RASTER_CONFIG, data);
1844 }
1845 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846}
1847
1848/**
1849 * cik_gpu_init - setup the 3D engine
1850 *
1851 * @rdev: radeon_device pointer
1852 *
1853 * Configures the 3D engine and tiling configuration
1854 * registers so that the 3D engine is usable.
1855 */
1856static void cik_gpu_init(struct radeon_device *rdev)
1857{
1858 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859 u32 mc_shared_chmap, mc_arb_ramcfg;
1860 u32 hdp_host_path_cntl;
1861 u32 tmp;
1862 int i, j;
1863
1864 switch (rdev->family) {
1865 case CHIP_BONAIRE:
1866 rdev->config.cik.max_shader_engines = 2;
1867 rdev->config.cik.max_tile_pipes = 4;
1868 rdev->config.cik.max_cu_per_sh = 7;
1869 rdev->config.cik.max_sh_per_se = 1;
1870 rdev->config.cik.max_backends_per_se = 2;
1871 rdev->config.cik.max_texture_channel_caches = 4;
1872 rdev->config.cik.max_gprs = 256;
1873 rdev->config.cik.max_gs_threads = 32;
1874 rdev->config.cik.max_hw_contexts = 8;
1875
1876 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881 break;
1882 case CHIP_KAVERI:
1883 /* TODO */
1884 break;
1885 case CHIP_KABINI:
1886 default:
1887 rdev->config.cik.max_shader_engines = 1;
1888 rdev->config.cik.max_tile_pipes = 2;
1889 rdev->config.cik.max_cu_per_sh = 2;
1890 rdev->config.cik.max_sh_per_se = 1;
1891 rdev->config.cik.max_backends_per_se = 1;
1892 rdev->config.cik.max_texture_channel_caches = 2;
1893 rdev->config.cik.max_gprs = 256;
1894 rdev->config.cik.max_gs_threads = 16;
1895 rdev->config.cik.max_hw_contexts = 8;
1896
1897 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902 break;
1903 }
1904
1905 /* Initialize HDP */
1906 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907 WREG32((0x2c14 + j), 0x00000000);
1908 WREG32((0x2c18 + j), 0x00000000);
1909 WREG32((0x2c1c + j), 0x00000000);
1910 WREG32((0x2c20 + j), 0x00000000);
1911 WREG32((0x2c24 + j), 0x00000000);
1912 }
1913
1914 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915
1916 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917
1918 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920
1921 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922 rdev->config.cik.mem_max_burst_length_bytes = 256;
1923 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 if (rdev->config.cik.mem_row_size_in_kb > 4)
1926 rdev->config.cik.mem_row_size_in_kb = 4;
1927 /* XXX use MC settings? */
1928 rdev->config.cik.shader_engine_tile_size = 32;
1929 rdev->config.cik.num_gpus = 1;
1930 rdev->config.cik.multi_gpu_tile_size = 64;
1931
1932 /* fix up row size */
1933 gb_addr_config &= ~ROW_SIZE_MASK;
1934 switch (rdev->config.cik.mem_row_size_in_kb) {
1935 case 1:
1936 default:
1937 gb_addr_config |= ROW_SIZE(0);
1938 break;
1939 case 2:
1940 gb_addr_config |= ROW_SIZE(1);
1941 break;
1942 case 4:
1943 gb_addr_config |= ROW_SIZE(2);
1944 break;
1945 }
1946
1947 /* setup tiling info dword. gb_addr_config is not adequate since it does
1948 * not have bank info, so create a custom tiling dword.
1949 * bits 3:0 num_pipes
1950 * bits 7:4 num_banks
1951 * bits 11:8 group_size
1952 * bits 15:12 row_size
1953 */
1954 rdev->config.cik.tile_config = 0;
1955 switch (rdev->config.cik.num_tile_pipes) {
1956 case 1:
1957 rdev->config.cik.tile_config |= (0 << 0);
1958 break;
1959 case 2:
1960 rdev->config.cik.tile_config |= (1 << 0);
1961 break;
1962 case 4:
1963 rdev->config.cik.tile_config |= (2 << 0);
1964 break;
1965 case 8:
1966 default:
1967 /* XXX what about 12? */
1968 rdev->config.cik.tile_config |= (3 << 0);
1969 break;
1970 }
1971 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972 rdev->config.cik.tile_config |= 1 << 4;
1973 else
1974 rdev->config.cik.tile_config |= 0 << 4;
1975 rdev->config.cik.tile_config |=
1976 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977 rdev->config.cik.tile_config |=
1978 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979
1980 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001983 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001985 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001988
1989 cik_tiling_mode_table_init(rdev);
1990
1991 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992 rdev->config.cik.max_sh_per_se,
1993 rdev->config.cik.max_backends_per_se);
1994
1995 /* set HW defaults for 3D engine */
1996 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997
1998 WREG32(SX_DEBUG_1, 0x20);
1999
2000 WREG32(TA_CNTL_AUX, 0x00010000);
2001
2002 tmp = RREG32(SPI_CONFIG_CNTL);
2003 tmp |= 0x03000000;
2004 WREG32(SPI_CONFIG_CNTL, tmp);
2005
2006 WREG32(SQ_CONFIG, 1);
2007
2008 WREG32(DB_DEBUG, 0);
2009
2010 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011 tmp |= 0x00000400;
2012 WREG32(DB_DEBUG2, tmp);
2013
2014 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015 tmp |= 0x00020200;
2016 WREG32(DB_DEBUG3, tmp);
2017
2018 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019 tmp |= 0x00018208;
2020 WREG32(CB_HW_CONTROL, tmp);
2021
2022 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023
2024 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028
2029 WREG32(VGT_NUM_INSTANCES, 1);
2030
2031 WREG32(CP_PERFMON_CNTL, 0);
2032
2033 WREG32(SQ_CONFIG, 0);
2034
2035 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 FORCE_EOV_MAX_REZ_CNT(255)));
2037
2038 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040
2041 WREG32(VGT_GS_VERTEX_REUSE, 16);
2042 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043
2044 tmp = RREG32(HDP_MISC_CNTL);
2045 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046 WREG32(HDP_MISC_CNTL, tmp);
2047
2048 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050
2051 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053
2054 udelay(50);
2055}
2056
Alex Deucher841cf442012-12-18 21:47:44 -05002057/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002058 * GPU scratch registers helpers function.
2059 */
2060/**
2061 * cik_scratch_init - setup driver info for CP scratch regs
2062 *
2063 * @rdev: radeon_device pointer
2064 *
2065 * Set up the number and offset of the CP scratch registers.
2066 * NOTE: use of CP scratch registers is a legacy inferface and
2067 * is not used by default on newer asics (r6xx+). On newer asics,
2068 * memory buffers are used for fences rather than scratch regs.
2069 */
2070static void cik_scratch_init(struct radeon_device *rdev)
2071{
2072 int i;
2073
2074 rdev->scratch.num_reg = 7;
2075 rdev->scratch.reg_base = SCRATCH_REG0;
2076 for (i = 0; i < rdev->scratch.num_reg; i++) {
2077 rdev->scratch.free[i] = true;
2078 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079 }
2080}
2081
2082/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002083 * cik_ring_test - basic gfx ring test
2084 *
2085 * @rdev: radeon_device pointer
2086 * @ring: radeon_ring structure holding ring information
2087 *
2088 * Allocate a scratch register and write to it using the gfx ring (CIK).
2089 * Provides a basic gfx ring test to verify that the ring is working.
2090 * Used by cik_cp_gfx_resume();
2091 * Returns 0 on success, error on failure.
2092 */
2093int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094{
2095 uint32_t scratch;
2096 uint32_t tmp = 0;
2097 unsigned i;
2098 int r;
2099
2100 r = radeon_scratch_get(rdev, &scratch);
2101 if (r) {
2102 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103 return r;
2104 }
2105 WREG32(scratch, 0xCAFEDEAD);
2106 r = radeon_ring_lock(rdev, ring, 3);
2107 if (r) {
2108 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109 radeon_scratch_free(rdev, scratch);
2110 return r;
2111 }
2112 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114 radeon_ring_write(ring, 0xDEADBEEF);
2115 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002116
Alex Deucherfbc832c2012-07-20 14:41:35 -04002117 for (i = 0; i < rdev->usec_timeout; i++) {
2118 tmp = RREG32(scratch);
2119 if (tmp == 0xDEADBEEF)
2120 break;
2121 DRM_UDELAY(1);
2122 }
2123 if (i < rdev->usec_timeout) {
2124 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125 } else {
2126 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 ring->idx, scratch, tmp);
2128 r = -EINVAL;
2129 }
2130 radeon_scratch_free(rdev, scratch);
2131 return r;
2132}
2133
2134/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002135 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002136 *
2137 * @rdev: radeon_device pointer
2138 * @fence: radeon fence object
2139 *
2140 * Emits a fence sequnce number on the gfx ring and flushes
2141 * GPU caches.
2142 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002143void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002145{
2146 struct radeon_ring *ring = &rdev->ring[fence->ring];
2147 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148
2149 /* EVENT_WRITE_EOP - flush caches, send int */
2150 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152 EOP_TC_ACTION_EN |
2153 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154 EVENT_INDEX(5)));
2155 radeon_ring_write(ring, addr & 0xfffffffc);
2156 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 radeon_ring_write(ring, fence->seq);
2158 radeon_ring_write(ring, 0);
2159 /* HDP flush */
2160 /* We should be using the new WAIT_REG_MEM special op packet here
2161 * but it causes the CP to hang
2162 */
2163 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165 WRITE_DATA_DST_SEL(0)));
2166 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167 radeon_ring_write(ring, 0);
2168 radeon_ring_write(ring, 0);
2169}
2170
Alex Deucherb07fdd32013-04-11 09:36:17 -04002171/**
2172 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173 *
2174 * @rdev: radeon_device pointer
2175 * @fence: radeon fence object
2176 *
2177 * Emits a fence sequnce number on the compute ring and flushes
2178 * GPU caches.
2179 */
2180void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181 struct radeon_fence *fence)
2182{
2183 struct radeon_ring *ring = &rdev->ring[fence->ring];
2184 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185
2186 /* RELEASE_MEM - flush caches, send int */
2187 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189 EOP_TC_ACTION_EN |
2190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191 EVENT_INDEX(5)));
2192 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193 radeon_ring_write(ring, addr & 0xfffffffc);
2194 radeon_ring_write(ring, upper_32_bits(addr));
2195 radeon_ring_write(ring, fence->seq);
2196 radeon_ring_write(ring, 0);
2197 /* HDP flush */
2198 /* We should be using the new WAIT_REG_MEM special op packet here
2199 * but it causes the CP to hang
2200 */
2201 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203 WRITE_DATA_DST_SEL(0)));
2204 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205 radeon_ring_write(ring, 0);
2206 radeon_ring_write(ring, 0);
2207}
2208
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002209void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210 struct radeon_ring *ring,
2211 struct radeon_semaphore *semaphore,
2212 bool emit_wait)
2213{
2214 uint64_t addr = semaphore->gpu_addr;
2215 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216
2217 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218 radeon_ring_write(ring, addr & 0xffffffff);
2219 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220}
2221
2222/*
2223 * IB stuff
2224 */
2225/**
2226 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227 *
2228 * @rdev: radeon_device pointer
2229 * @ib: radeon indirect buffer object
2230 *
2231 * Emits an DE (drawing engine) or CE (constant engine) IB
2232 * on the gfx ring. IBs are usually generated by userspace
2233 * acceleration drivers and submitted to the kernel for
2234 * sheduling on the ring. This function schedules the IB
2235 * on the gfx ring for execution by the GPU.
2236 */
2237void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238{
2239 struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 u32 header, control = INDIRECT_BUFFER_VALID;
2241
2242 if (ib->is_const_ib) {
2243 /* set switch buffer packet before const IB */
2244 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245 radeon_ring_write(ring, 0);
2246
2247 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248 } else {
2249 u32 next_rptr;
2250 if (ring->rptr_save_reg) {
2251 next_rptr = ring->wptr + 3 + 4;
2252 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253 radeon_ring_write(ring, ((ring->rptr_save_reg -
2254 PACKET3_SET_UCONFIG_REG_START) >> 2));
2255 radeon_ring_write(ring, next_rptr);
2256 } else if (rdev->wb.enabled) {
2257 next_rptr = ring->wptr + 5 + 4;
2258 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262 radeon_ring_write(ring, next_rptr);
2263 }
2264
2265 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266 }
2267
2268 control |= ib->length_dw |
2269 (ib->vm ? (ib->vm->id << 24) : 0);
2270
2271 radeon_ring_write(ring, header);
2272 radeon_ring_write(ring,
2273#ifdef __BIG_ENDIAN
2274 (2 << 0) |
2275#endif
2276 (ib->gpu_addr & 0xFFFFFFFC));
2277 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278 radeon_ring_write(ring, control);
2279}
2280
Alex Deucherfbc832c2012-07-20 14:41:35 -04002281/**
2282 * cik_ib_test - basic gfx ring IB test
2283 *
2284 * @rdev: radeon_device pointer
2285 * @ring: radeon_ring structure holding ring information
2286 *
2287 * Allocate an IB and execute it on the gfx ring (CIK).
2288 * Provides a basic gfx ring test to verify that IBs are working.
2289 * Returns 0 on success, error on failure.
2290 */
2291int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292{
2293 struct radeon_ib ib;
2294 uint32_t scratch;
2295 uint32_t tmp = 0;
2296 unsigned i;
2297 int r;
2298
2299 r = radeon_scratch_get(rdev, &scratch);
2300 if (r) {
2301 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302 return r;
2303 }
2304 WREG32(scratch, 0xCAFEDEAD);
2305 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306 if (r) {
2307 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308 return r;
2309 }
2310 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312 ib.ptr[2] = 0xDEADBEEF;
2313 ib.length_dw = 3;
2314 r = radeon_ib_schedule(rdev, &ib, NULL);
2315 if (r) {
2316 radeon_scratch_free(rdev, scratch);
2317 radeon_ib_free(rdev, &ib);
2318 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319 return r;
2320 }
2321 r = radeon_fence_wait(ib.fence, false);
2322 if (r) {
2323 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324 return r;
2325 }
2326 for (i = 0; i < rdev->usec_timeout; i++) {
2327 tmp = RREG32(scratch);
2328 if (tmp == 0xDEADBEEF)
2329 break;
2330 DRM_UDELAY(1);
2331 }
2332 if (i < rdev->usec_timeout) {
2333 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334 } else {
2335 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336 scratch, tmp);
2337 r = -EINVAL;
2338 }
2339 radeon_scratch_free(rdev, scratch);
2340 radeon_ib_free(rdev, &ib);
2341 return r;
2342}
2343
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002344/*
Alex Deucher841cf442012-12-18 21:47:44 -05002345 * CP.
2346 * On CIK, gfx and compute now have independant command processors.
2347 *
2348 * GFX
2349 * Gfx consists of a single ring and can process both gfx jobs and
2350 * compute jobs. The gfx CP consists of three microengines (ME):
2351 * PFP - Pre-Fetch Parser
2352 * ME - Micro Engine
2353 * CE - Constant Engine
2354 * The PFP and ME make up what is considered the Drawing Engine (DE).
2355 * The CE is an asynchronous engine used for updating buffer desciptors
2356 * used by the DE so that they can be loaded into cache in parallel
2357 * while the DE is processing state update packets.
2358 *
2359 * Compute
2360 * The compute CP consists of two microengines (ME):
2361 * MEC1 - Compute MicroEngine 1
2362 * MEC2 - Compute MicroEngine 2
2363 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364 * The queues are exposed to userspace and are programmed directly
2365 * by the compute runtime.
2366 */
2367/**
2368 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369 *
2370 * @rdev: radeon_device pointer
2371 * @enable: enable or disable the MEs
2372 *
2373 * Halts or unhalts the gfx MEs.
2374 */
2375static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376{
2377 if (enable)
2378 WREG32(CP_ME_CNTL, 0);
2379 else {
2380 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 }
2383 udelay(50);
2384}
2385
2386/**
2387 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388 *
2389 * @rdev: radeon_device pointer
2390 *
2391 * Loads the gfx PFP, ME, and CE ucode.
2392 * Returns 0 for success, -EINVAL if the ucode is not available.
2393 */
2394static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395{
2396 const __be32 *fw_data;
2397 int i;
2398
2399 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400 return -EINVAL;
2401
2402 cik_cp_gfx_enable(rdev, false);
2403
2404 /* PFP */
2405 fw_data = (const __be32 *)rdev->pfp_fw->data;
2406 WREG32(CP_PFP_UCODE_ADDR, 0);
2407 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409 WREG32(CP_PFP_UCODE_ADDR, 0);
2410
2411 /* CE */
2412 fw_data = (const __be32 *)rdev->ce_fw->data;
2413 WREG32(CP_CE_UCODE_ADDR, 0);
2414 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416 WREG32(CP_CE_UCODE_ADDR, 0);
2417
2418 /* ME */
2419 fw_data = (const __be32 *)rdev->me_fw->data;
2420 WREG32(CP_ME_RAM_WADDR, 0);
2421 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423 WREG32(CP_ME_RAM_WADDR, 0);
2424
2425 WREG32(CP_PFP_UCODE_ADDR, 0);
2426 WREG32(CP_CE_UCODE_ADDR, 0);
2427 WREG32(CP_ME_RAM_WADDR, 0);
2428 WREG32(CP_ME_RAM_RADDR, 0);
2429 return 0;
2430}
2431
2432/**
2433 * cik_cp_gfx_start - start the gfx ring
2434 *
2435 * @rdev: radeon_device pointer
2436 *
2437 * Enables the ring and loads the clear state context and other
2438 * packets required to init the ring.
2439 * Returns 0 for success, error for failure.
2440 */
2441static int cik_cp_gfx_start(struct radeon_device *rdev)
2442{
2443 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444 int r, i;
2445
2446 /* init the CP */
2447 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448 WREG32(CP_ENDIAN_SWAP, 0);
2449 WREG32(CP_DEVICE_ID, 1);
2450
2451 cik_cp_gfx_enable(rdev, true);
2452
2453 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454 if (r) {
2455 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456 return r;
2457 }
2458
2459 /* init the CE partitions. CE only used for gfx on CIK */
2460 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462 radeon_ring_write(ring, 0xc000);
2463 radeon_ring_write(ring, 0xc000);
2464
2465 /* setup clear context state */
2466 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468
2469 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470 radeon_ring_write(ring, 0x80000000);
2471 radeon_ring_write(ring, 0x80000000);
2472
2473 for (i = 0; i < cik_default_size; i++)
2474 radeon_ring_write(ring, cik_default_state[i]);
2475
2476 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478
2479 /* set clear context state */
2480 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481 radeon_ring_write(ring, 0);
2482
2483 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484 radeon_ring_write(ring, 0x00000316);
2485 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487
2488 radeon_ring_unlock_commit(rdev, ring);
2489
2490 return 0;
2491}
2492
2493/**
2494 * cik_cp_gfx_fini - stop the gfx ring
2495 *
2496 * @rdev: radeon_device pointer
2497 *
2498 * Stop the gfx ring and tear down the driver ring
2499 * info.
2500 */
2501static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502{
2503 cik_cp_gfx_enable(rdev, false);
2504 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505}
2506
2507/**
2508 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509 *
2510 * @rdev: radeon_device pointer
2511 *
2512 * Program the location and size of the gfx ring buffer
2513 * and test it to make sure it's working.
2514 * Returns 0 for success, error for failure.
2515 */
2516static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517{
2518 struct radeon_ring *ring;
2519 u32 tmp;
2520 u32 rb_bufsz;
2521 u64 rb_addr;
2522 int r;
2523
2524 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526
2527 /* Set the write pointer delay */
2528 WREG32(CP_RB_WPTR_DELAY, 0);
2529
2530 /* set the RB to use vmid 0 */
2531 WREG32(CP_RB_VMID, 0);
2532
2533 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534
2535 /* ring 0 - compute and gfx */
2536 /* Set ring buffer size */
2537 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538 rb_bufsz = drm_order(ring->ring_size / 8);
2539 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540#ifdef __BIG_ENDIAN
2541 tmp |= BUF_SWAP_32BIT;
2542#endif
2543 WREG32(CP_RB0_CNTL, tmp);
2544
2545 /* Initialize the ring buffer's read and write pointers */
2546 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547 ring->wptr = 0;
2548 WREG32(CP_RB0_WPTR, ring->wptr);
2549
2550 /* set the wb address wether it's enabled or not */
2551 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553
2554 /* scratch register shadowing is no longer supported */
2555 WREG32(SCRATCH_UMSK, 0);
2556
2557 if (!rdev->wb.enabled)
2558 tmp |= RB_NO_UPDATE;
2559
2560 mdelay(1);
2561 WREG32(CP_RB0_CNTL, tmp);
2562
2563 rb_addr = ring->gpu_addr >> 8;
2564 WREG32(CP_RB0_BASE, rb_addr);
2565 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566
2567 ring->rptr = RREG32(CP_RB0_RPTR);
2568
2569 /* start the ring */
2570 cik_cp_gfx_start(rdev);
2571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573 if (r) {
2574 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 return r;
2576 }
2577 return 0;
2578}
2579
Alex Deucher963e81f2013-06-26 17:37:11 -04002580u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581 struct radeon_ring *ring)
2582{
2583 u32 rptr;
2584
2585
2586
2587 if (rdev->wb.enabled) {
2588 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589 } else {
2590 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2591 rptr = RREG32(CP_HQD_PQ_RPTR);
2592 cik_srbm_select(rdev, 0, 0, 0, 0);
2593 }
2594 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2595
2596 return rptr;
2597}
2598
2599u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2600 struct radeon_ring *ring)
2601{
2602 u32 wptr;
2603
2604 if (rdev->wb.enabled) {
2605 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2606 } else {
2607 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2608 wptr = RREG32(CP_HQD_PQ_WPTR);
2609 cik_srbm_select(rdev, 0, 0, 0, 0);
2610 }
2611 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2612
2613 return wptr;
2614}
2615
2616void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2617 struct radeon_ring *ring)
2618{
2619 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620
2621 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2622 WDOORBELL32(ring->doorbell_offset, wptr);
2623}
2624
Alex Deucher841cf442012-12-18 21:47:44 -05002625/**
2626 * cik_cp_compute_enable - enable/disable the compute CP MEs
2627 *
2628 * @rdev: radeon_device pointer
2629 * @enable: enable or disable the MEs
2630 *
2631 * Halts or unhalts the compute MEs.
2632 */
2633static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2634{
2635 if (enable)
2636 WREG32(CP_MEC_CNTL, 0);
2637 else
2638 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2639 udelay(50);
2640}
2641
2642/**
2643 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644 *
2645 * @rdev: radeon_device pointer
2646 *
2647 * Loads the compute MEC1&2 ucode.
2648 * Returns 0 for success, -EINVAL if the ucode is not available.
2649 */
2650static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651{
2652 const __be32 *fw_data;
2653 int i;
2654
2655 if (!rdev->mec_fw)
2656 return -EINVAL;
2657
2658 cik_cp_compute_enable(rdev, false);
2659
2660 /* MEC1 */
2661 fw_data = (const __be32 *)rdev->mec_fw->data;
2662 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2663 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2664 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2665 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666
2667 if (rdev->family == CHIP_KAVERI) {
2668 /* MEC2 */
2669 fw_data = (const __be32 *)rdev->mec_fw->data;
2670 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2671 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2672 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2673 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2674 }
2675
2676 return 0;
2677}
2678
2679/**
2680 * cik_cp_compute_start - start the compute queues
2681 *
2682 * @rdev: radeon_device pointer
2683 *
2684 * Enable the compute queues.
2685 * Returns 0 for success, error for failure.
2686 */
2687static int cik_cp_compute_start(struct radeon_device *rdev)
2688{
Alex Deucher963e81f2013-06-26 17:37:11 -04002689 cik_cp_compute_enable(rdev, true);
2690
Alex Deucher841cf442012-12-18 21:47:44 -05002691 return 0;
2692}
2693
2694/**
2695 * cik_cp_compute_fini - stop the compute queues
2696 *
2697 * @rdev: radeon_device pointer
2698 *
2699 * Stop the compute queues and tear down the driver queue
2700 * info.
2701 */
2702static void cik_cp_compute_fini(struct radeon_device *rdev)
2703{
Alex Deucher963e81f2013-06-26 17:37:11 -04002704 int i, idx, r;
2705
Alex Deucher841cf442012-12-18 21:47:44 -05002706 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002707
2708 for (i = 0; i < 2; i++) {
2709 if (i == 0)
2710 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711 else
2712 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713
2714 if (rdev->ring[idx].mqd_obj) {
2715 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2716 if (unlikely(r != 0))
2717 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718
2719 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2720 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721
2722 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2723 rdev->ring[idx].mqd_obj = NULL;
2724 }
2725 }
Alex Deucher841cf442012-12-18 21:47:44 -05002726}
2727
Alex Deucher963e81f2013-06-26 17:37:11 -04002728static void cik_mec_fini(struct radeon_device *rdev)
2729{
2730 int r;
2731
2732 if (rdev->mec.hpd_eop_obj) {
2733 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2734 if (unlikely(r != 0))
2735 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2736 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2737 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738
2739 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2740 rdev->mec.hpd_eop_obj = NULL;
2741 }
2742}
2743
2744#define MEC_HPD_SIZE 2048
2745
2746static int cik_mec_init(struct radeon_device *rdev)
2747{
2748 int r;
2749 u32 *hpd;
2750
2751 /*
2752 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2753 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754 */
2755 if (rdev->family == CHIP_KAVERI)
2756 rdev->mec.num_mec = 2;
2757 else
2758 rdev->mec.num_mec = 1;
2759 rdev->mec.num_pipe = 4;
2760 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761
2762 if (rdev->mec.hpd_eop_obj == NULL) {
2763 r = radeon_bo_create(rdev,
2764 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765 PAGE_SIZE, true,
2766 RADEON_GEM_DOMAIN_GTT, NULL,
2767 &rdev->mec.hpd_eop_obj);
2768 if (r) {
2769 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2770 return r;
2771 }
2772 }
2773
2774 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2775 if (unlikely(r != 0)) {
2776 cik_mec_fini(rdev);
2777 return r;
2778 }
2779 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2780 &rdev->mec.hpd_eop_gpu_addr);
2781 if (r) {
2782 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2783 cik_mec_fini(rdev);
2784 return r;
2785 }
2786 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787 if (r) {
2788 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2789 cik_mec_fini(rdev);
2790 return r;
2791 }
2792
2793 /* clear memory. Not sure if this is required or not */
2794 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795
2796 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2797 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2798
2799 return 0;
2800}
2801
2802struct hqd_registers
2803{
2804 u32 cp_mqd_base_addr;
2805 u32 cp_mqd_base_addr_hi;
2806 u32 cp_hqd_active;
2807 u32 cp_hqd_vmid;
2808 u32 cp_hqd_persistent_state;
2809 u32 cp_hqd_pipe_priority;
2810 u32 cp_hqd_queue_priority;
2811 u32 cp_hqd_quantum;
2812 u32 cp_hqd_pq_base;
2813 u32 cp_hqd_pq_base_hi;
2814 u32 cp_hqd_pq_rptr;
2815 u32 cp_hqd_pq_rptr_report_addr;
2816 u32 cp_hqd_pq_rptr_report_addr_hi;
2817 u32 cp_hqd_pq_wptr_poll_addr;
2818 u32 cp_hqd_pq_wptr_poll_addr_hi;
2819 u32 cp_hqd_pq_doorbell_control;
2820 u32 cp_hqd_pq_wptr;
2821 u32 cp_hqd_pq_control;
2822 u32 cp_hqd_ib_base_addr;
2823 u32 cp_hqd_ib_base_addr_hi;
2824 u32 cp_hqd_ib_rptr;
2825 u32 cp_hqd_ib_control;
2826 u32 cp_hqd_iq_timer;
2827 u32 cp_hqd_iq_rptr;
2828 u32 cp_hqd_dequeue_request;
2829 u32 cp_hqd_dma_offload;
2830 u32 cp_hqd_sema_cmd;
2831 u32 cp_hqd_msg_type;
2832 u32 cp_hqd_atomic0_preop_lo;
2833 u32 cp_hqd_atomic0_preop_hi;
2834 u32 cp_hqd_atomic1_preop_lo;
2835 u32 cp_hqd_atomic1_preop_hi;
2836 u32 cp_hqd_hq_scheduler0;
2837 u32 cp_hqd_hq_scheduler1;
2838 u32 cp_mqd_control;
2839};
2840
2841struct bonaire_mqd
2842{
2843 u32 header;
2844 u32 dispatch_initiator;
2845 u32 dimensions[3];
2846 u32 start_idx[3];
2847 u32 num_threads[3];
2848 u32 pipeline_stat_enable;
2849 u32 perf_counter_enable;
2850 u32 pgm[2];
2851 u32 tba[2];
2852 u32 tma[2];
2853 u32 pgm_rsrc[2];
2854 u32 vmid;
2855 u32 resource_limits;
2856 u32 static_thread_mgmt01[2];
2857 u32 tmp_ring_size;
2858 u32 static_thread_mgmt23[2];
2859 u32 restart[3];
2860 u32 thread_trace_enable;
2861 u32 reserved1;
2862 u32 user_data[16];
2863 u32 vgtcs_invoke_count[2];
2864 struct hqd_registers queue_state;
2865 u32 dequeue_cntr;
2866 u32 interrupt_queue[64];
2867};
2868
Alex Deucher841cf442012-12-18 21:47:44 -05002869/**
2870 * cik_cp_compute_resume - setup the compute queue registers
2871 *
2872 * @rdev: radeon_device pointer
2873 *
2874 * Program the compute queues and test them to make sure they
2875 * are working.
2876 * Returns 0 for success, error for failure.
2877 */
2878static int cik_cp_compute_resume(struct radeon_device *rdev)
2879{
Alex Deucher963e81f2013-06-26 17:37:11 -04002880 int r, i, idx;
2881 u32 tmp;
2882 bool use_doorbell = true;
2883 u64 hqd_gpu_addr;
2884 u64 mqd_gpu_addr;
2885 u64 eop_gpu_addr;
2886 u64 wb_gpu_addr;
2887 u32 *buf;
2888 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002889
Alex Deucher841cf442012-12-18 21:47:44 -05002890 r = cik_cp_compute_start(rdev);
2891 if (r)
2892 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002893
2894 /* fix up chicken bits */
2895 tmp = RREG32(CP_CPF_DEBUG);
2896 tmp |= (1 << 23);
2897 WREG32(CP_CPF_DEBUG, tmp);
2898
2899 /* init the pipes */
2900 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 int me = (i < 4) ? 1 : 2;
2902 int pipe = (i < 4) ? i : (i - 4);
2903
2904 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905
2906 cik_srbm_select(rdev, me, pipe, 0, 0);
2907
2908 /* write the EOP addr */
2909 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911
2912 /* set the VMID assigned */
2913 WREG32(CP_HPD_EOP_VMID, 0);
2914
2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 tmp &= ~EOP_SIZE_MASK;
2918 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 WREG32(CP_HPD_EOP_CONTROL, tmp);
2920 }
2921 cik_srbm_select(rdev, 0, 0, 0, 0);
2922
2923 /* init the queues. Just two for now. */
2924 for (i = 0; i < 2; i++) {
2925 if (i == 0)
2926 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2927 else
2928 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2929
2930 if (rdev->ring[idx].mqd_obj == NULL) {
2931 r = radeon_bo_create(rdev,
2932 sizeof(struct bonaire_mqd),
2933 PAGE_SIZE, true,
2934 RADEON_GEM_DOMAIN_GTT, NULL,
2935 &rdev->ring[idx].mqd_obj);
2936 if (r) {
2937 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2938 return r;
2939 }
2940 }
2941
2942 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2943 if (unlikely(r != 0)) {
2944 cik_cp_compute_fini(rdev);
2945 return r;
2946 }
2947 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2948 &mqd_gpu_addr);
2949 if (r) {
2950 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2951 cik_cp_compute_fini(rdev);
2952 return r;
2953 }
2954 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2955 if (r) {
2956 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2957 cik_cp_compute_fini(rdev);
2958 return r;
2959 }
2960
2961 /* doorbell offset */
2962 rdev->ring[idx].doorbell_offset =
2963 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2964
2965 /* init the mqd struct */
2966 memset(buf, 0, sizeof(struct bonaire_mqd));
2967
2968 mqd = (struct bonaire_mqd *)buf;
2969 mqd->header = 0xC0310800;
2970 mqd->static_thread_mgmt01[0] = 0xffffffff;
2971 mqd->static_thread_mgmt01[1] = 0xffffffff;
2972 mqd->static_thread_mgmt23[0] = 0xffffffff;
2973 mqd->static_thread_mgmt23[1] = 0xffffffff;
2974
2975 cik_srbm_select(rdev, rdev->ring[idx].me,
2976 rdev->ring[idx].pipe,
2977 rdev->ring[idx].queue, 0);
2978
2979 /* disable wptr polling */
2980 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2981 tmp &= ~WPTR_POLL_EN;
2982 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2983
2984 /* enable doorbell? */
2985 mqd->queue_state.cp_hqd_pq_doorbell_control =
2986 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2987 if (use_doorbell)
2988 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2989 else
2990 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2991 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2992 mqd->queue_state.cp_hqd_pq_doorbell_control);
2993
2994 /* disable the queue if it's active */
2995 mqd->queue_state.cp_hqd_dequeue_request = 0;
2996 mqd->queue_state.cp_hqd_pq_rptr = 0;
2997 mqd->queue_state.cp_hqd_pq_wptr= 0;
2998 if (RREG32(CP_HQD_ACTIVE) & 1) {
2999 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3000 for (i = 0; i < rdev->usec_timeout; i++) {
3001 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3002 break;
3003 udelay(1);
3004 }
3005 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3006 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3007 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3008 }
3009
3010 /* set the pointer to the MQD */
3011 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3012 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3013 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3014 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3015 /* set MQD vmid to 0 */
3016 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3017 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3018 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3019
3020 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3021 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3022 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3023 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3024 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3025 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3026
3027 /* set up the HQD, this is similar to CP_RB0_CNTL */
3028 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3029 mqd->queue_state.cp_hqd_pq_control &=
3030 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3031
3032 mqd->queue_state.cp_hqd_pq_control |=
3033 drm_order(rdev->ring[idx].ring_size / 8);
3034 mqd->queue_state.cp_hqd_pq_control |=
3035 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3036#ifdef __BIG_ENDIAN
3037 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3038#endif
3039 mqd->queue_state.cp_hqd_pq_control &=
3040 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3041 mqd->queue_state.cp_hqd_pq_control |=
3042 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3043 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3044
3045 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3046 if (i == 0)
3047 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3048 else
3049 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3050 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3051 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3052 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3053 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3054 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3055
3056 /* set the wb address wether it's enabled or not */
3057 if (i == 0)
3058 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3059 else
3060 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3061 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3062 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3063 upper_32_bits(wb_gpu_addr) & 0xffff;
3064 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3065 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3067 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3068
3069 /* enable the doorbell if requested */
3070 if (use_doorbell) {
3071 mqd->queue_state.cp_hqd_pq_doorbell_control =
3072 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3073 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3074 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3075 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3076 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3077 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3078 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3079
3080 } else {
3081 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3082 }
3083 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3084 mqd->queue_state.cp_hqd_pq_doorbell_control);
3085
3086 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3087 rdev->ring[idx].wptr = 0;
3088 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3089 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3090 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3091 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3092
3093 /* set the vmid for the queue */
3094 mqd->queue_state.cp_hqd_vmid = 0;
3095 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3096
3097 /* activate the queue */
3098 mqd->queue_state.cp_hqd_active = 1;
3099 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3100
3101 cik_srbm_select(rdev, 0, 0, 0, 0);
3102
3103 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3104 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3105
3106 rdev->ring[idx].ready = true;
3107 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3108 if (r)
3109 rdev->ring[idx].ready = false;
3110 }
3111
Alex Deucher841cf442012-12-18 21:47:44 -05003112 return 0;
3113}
3114
Alex Deucher841cf442012-12-18 21:47:44 -05003115static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3116{
3117 cik_cp_gfx_enable(rdev, enable);
3118 cik_cp_compute_enable(rdev, enable);
3119}
3120
Alex Deucher841cf442012-12-18 21:47:44 -05003121static int cik_cp_load_microcode(struct radeon_device *rdev)
3122{
3123 int r;
3124
3125 r = cik_cp_gfx_load_microcode(rdev);
3126 if (r)
3127 return r;
3128 r = cik_cp_compute_load_microcode(rdev);
3129 if (r)
3130 return r;
3131
3132 return 0;
3133}
3134
Alex Deucher841cf442012-12-18 21:47:44 -05003135static void cik_cp_fini(struct radeon_device *rdev)
3136{
3137 cik_cp_gfx_fini(rdev);
3138 cik_cp_compute_fini(rdev);
3139}
3140
Alex Deucher841cf442012-12-18 21:47:44 -05003141static int cik_cp_resume(struct radeon_device *rdev)
3142{
3143 int r;
3144
3145 /* Reset all cp blocks */
3146 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3147 RREG32(GRBM_SOFT_RESET);
3148 mdelay(15);
3149 WREG32(GRBM_SOFT_RESET, 0);
3150 RREG32(GRBM_SOFT_RESET);
3151
3152 r = cik_cp_load_microcode(rdev);
3153 if (r)
3154 return r;
3155
3156 r = cik_cp_gfx_resume(rdev);
3157 if (r)
3158 return r;
3159 r = cik_cp_compute_resume(rdev);
3160 if (r)
3161 return r;
3162
3163 return 0;
3164}
3165
Alex Deucher21a93e12013-04-09 12:47:11 -04003166/*
3167 * sDMA - System DMA
3168 * Starting with CIK, the GPU has new asynchronous
3169 * DMA engines. These engines are used for compute
3170 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3171 * and each one supports 1 ring buffer used for gfx
3172 * and 2 queues used for compute.
3173 *
3174 * The programming model is very similar to the CP
3175 * (ring buffer, IBs, etc.), but sDMA has it's own
3176 * packet format that is different from the PM4 format
3177 * used by the CP. sDMA supports copying data, writing
3178 * embedded data, solid fills, and a number of other
3179 * things. It also has support for tiling/detiling of
3180 * buffers.
3181 */
3182/**
3183 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3184 *
3185 * @rdev: radeon_device pointer
3186 * @ib: IB object to schedule
3187 *
3188 * Schedule an IB in the DMA ring (CIK).
3189 */
3190void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3191 struct radeon_ib *ib)
3192{
3193 struct radeon_ring *ring = &rdev->ring[ib->ring];
3194 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3195
3196 if (rdev->wb.enabled) {
3197 u32 next_rptr = ring->wptr + 5;
3198 while ((next_rptr & 7) != 4)
3199 next_rptr++;
3200 next_rptr += 4;
3201 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3202 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3203 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3204 radeon_ring_write(ring, 1); /* number of DWs to follow */
3205 radeon_ring_write(ring, next_rptr);
3206 }
3207
3208 /* IB packet must end on a 8 DW boundary */
3209 while ((ring->wptr & 7) != 4)
3210 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3211 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3212 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3213 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3214 radeon_ring_write(ring, ib->length_dw);
3215
3216}
3217
3218/**
3219 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3220 *
3221 * @rdev: radeon_device pointer
3222 * @fence: radeon fence object
3223 *
3224 * Add a DMA fence packet to the ring to write
3225 * the fence seq number and DMA trap packet to generate
3226 * an interrupt if needed (CIK).
3227 */
3228void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3229 struct radeon_fence *fence)
3230{
3231 struct radeon_ring *ring = &rdev->ring[fence->ring];
3232 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3233 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3234 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3235 u32 ref_and_mask;
3236
3237 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3238 ref_and_mask = SDMA0;
3239 else
3240 ref_and_mask = SDMA1;
3241
3242 /* write the fence */
3243 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3244 radeon_ring_write(ring, addr & 0xffffffff);
3245 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3246 radeon_ring_write(ring, fence->seq);
3247 /* generate an interrupt */
3248 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3249 /* flush HDP */
3250 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3251 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3252 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3253 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3254 radeon_ring_write(ring, ref_and_mask); /* MASK */
3255 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3256}
3257
3258/**
3259 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3260 *
3261 * @rdev: radeon_device pointer
3262 * @ring: radeon_ring structure holding ring information
3263 * @semaphore: radeon semaphore object
3264 * @emit_wait: wait or signal semaphore
3265 *
3266 * Add a DMA semaphore packet to the ring wait on or signal
3267 * other rings (CIK).
3268 */
3269void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3270 struct radeon_ring *ring,
3271 struct radeon_semaphore *semaphore,
3272 bool emit_wait)
3273{
3274 u64 addr = semaphore->gpu_addr;
3275 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3276
3277 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3278 radeon_ring_write(ring, addr & 0xfffffff8);
3279 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3280}
3281
3282/**
3283 * cik_sdma_gfx_stop - stop the gfx async dma engines
3284 *
3285 * @rdev: radeon_device pointer
3286 *
3287 * Stop the gfx async dma ring buffers (CIK).
3288 */
3289static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3290{
3291 u32 rb_cntl, reg_offset;
3292 int i;
3293
3294 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3295
3296 for (i = 0; i < 2; i++) {
3297 if (i == 0)
3298 reg_offset = SDMA0_REGISTER_OFFSET;
3299 else
3300 reg_offset = SDMA1_REGISTER_OFFSET;
3301 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3302 rb_cntl &= ~SDMA_RB_ENABLE;
3303 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3304 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3305 }
3306}
3307
3308/**
3309 * cik_sdma_rlc_stop - stop the compute async dma engines
3310 *
3311 * @rdev: radeon_device pointer
3312 *
3313 * Stop the compute async dma queues (CIK).
3314 */
3315static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3316{
3317 /* XXX todo */
3318}
3319
3320/**
3321 * cik_sdma_enable - stop the async dma engines
3322 *
3323 * @rdev: radeon_device pointer
3324 * @enable: enable/disable the DMA MEs.
3325 *
3326 * Halt or unhalt the async dma engines (CIK).
3327 */
3328static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3329{
3330 u32 me_cntl, reg_offset;
3331 int i;
3332
3333 for (i = 0; i < 2; i++) {
3334 if (i == 0)
3335 reg_offset = SDMA0_REGISTER_OFFSET;
3336 else
3337 reg_offset = SDMA1_REGISTER_OFFSET;
3338 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3339 if (enable)
3340 me_cntl &= ~SDMA_HALT;
3341 else
3342 me_cntl |= SDMA_HALT;
3343 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3344 }
3345}
3346
3347/**
3348 * cik_sdma_gfx_resume - setup and start the async dma engines
3349 *
3350 * @rdev: radeon_device pointer
3351 *
3352 * Set up the gfx DMA ring buffers and enable them (CIK).
3353 * Returns 0 for success, error for failure.
3354 */
3355static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3356{
3357 struct radeon_ring *ring;
3358 u32 rb_cntl, ib_cntl;
3359 u32 rb_bufsz;
3360 u32 reg_offset, wb_offset;
3361 int i, r;
3362
3363 for (i = 0; i < 2; i++) {
3364 if (i == 0) {
3365 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3366 reg_offset = SDMA0_REGISTER_OFFSET;
3367 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3368 } else {
3369 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3370 reg_offset = SDMA1_REGISTER_OFFSET;
3371 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3372 }
3373
3374 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3375 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3376
3377 /* Set ring buffer size in dwords */
3378 rb_bufsz = drm_order(ring->ring_size / 4);
3379 rb_cntl = rb_bufsz << 1;
3380#ifdef __BIG_ENDIAN
3381 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3382#endif
3383 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3384
3385 /* Initialize the ring buffer's read and write pointers */
3386 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3387 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3388
3389 /* set the wb address whether it's enabled or not */
3390 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3391 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3392 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3393 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3394
3395 if (rdev->wb.enabled)
3396 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3397
3398 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3399 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3400
3401 ring->wptr = 0;
3402 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3403
3404 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3405
3406 /* enable DMA RB */
3407 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3408
3409 ib_cntl = SDMA_IB_ENABLE;
3410#ifdef __BIG_ENDIAN
3411 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3412#endif
3413 /* enable DMA IBs */
3414 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3415
3416 ring->ready = true;
3417
3418 r = radeon_ring_test(rdev, ring->idx, ring);
3419 if (r) {
3420 ring->ready = false;
3421 return r;
3422 }
3423 }
3424
3425 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3426
3427 return 0;
3428}
3429
3430/**
3431 * cik_sdma_rlc_resume - setup and start the async dma engines
3432 *
3433 * @rdev: radeon_device pointer
3434 *
3435 * Set up the compute DMA queues and enable them (CIK).
3436 * Returns 0 for success, error for failure.
3437 */
3438static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3439{
3440 /* XXX todo */
3441 return 0;
3442}
3443
3444/**
3445 * cik_sdma_load_microcode - load the sDMA ME ucode
3446 *
3447 * @rdev: radeon_device pointer
3448 *
3449 * Loads the sDMA0/1 ucode.
3450 * Returns 0 for success, -EINVAL if the ucode is not available.
3451 */
3452static int cik_sdma_load_microcode(struct radeon_device *rdev)
3453{
3454 const __be32 *fw_data;
3455 int i;
3456
3457 if (!rdev->sdma_fw)
3458 return -EINVAL;
3459
3460 /* stop the gfx rings and rlc compute queues */
3461 cik_sdma_gfx_stop(rdev);
3462 cik_sdma_rlc_stop(rdev);
3463
3464 /* halt the MEs */
3465 cik_sdma_enable(rdev, false);
3466
3467 /* sdma0 */
3468 fw_data = (const __be32 *)rdev->sdma_fw->data;
3469 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3470 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3471 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3472 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3473
3474 /* sdma1 */
3475 fw_data = (const __be32 *)rdev->sdma_fw->data;
3476 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3477 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3478 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3479 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3480
3481 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3482 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3483 return 0;
3484}
3485
3486/**
3487 * cik_sdma_resume - setup and start the async dma engines
3488 *
3489 * @rdev: radeon_device pointer
3490 *
3491 * Set up the DMA engines and enable them (CIK).
3492 * Returns 0 for success, error for failure.
3493 */
3494static int cik_sdma_resume(struct radeon_device *rdev)
3495{
3496 int r;
3497
3498 /* Reset dma */
3499 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3500 RREG32(SRBM_SOFT_RESET);
3501 udelay(50);
3502 WREG32(SRBM_SOFT_RESET, 0);
3503 RREG32(SRBM_SOFT_RESET);
3504
3505 r = cik_sdma_load_microcode(rdev);
3506 if (r)
3507 return r;
3508
3509 /* unhalt the MEs */
3510 cik_sdma_enable(rdev, true);
3511
3512 /* start the gfx rings and rlc compute queues */
3513 r = cik_sdma_gfx_resume(rdev);
3514 if (r)
3515 return r;
3516 r = cik_sdma_rlc_resume(rdev);
3517 if (r)
3518 return r;
3519
3520 return 0;
3521}
3522
3523/**
3524 * cik_sdma_fini - tear down the async dma engines
3525 *
3526 * @rdev: radeon_device pointer
3527 *
3528 * Stop the async dma engines and free the rings (CIK).
3529 */
3530static void cik_sdma_fini(struct radeon_device *rdev)
3531{
3532 /* stop the gfx rings and rlc compute queues */
3533 cik_sdma_gfx_stop(rdev);
3534 cik_sdma_rlc_stop(rdev);
3535 /* halt the MEs */
3536 cik_sdma_enable(rdev, false);
3537 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3538 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3539 /* XXX - compute dma queue tear down */
3540}
3541
3542/**
3543 * cik_copy_dma - copy pages using the DMA engine
3544 *
3545 * @rdev: radeon_device pointer
3546 * @src_offset: src GPU address
3547 * @dst_offset: dst GPU address
3548 * @num_gpu_pages: number of GPU pages to xfer
3549 * @fence: radeon fence object
3550 *
3551 * Copy GPU paging using the DMA engine (CIK).
3552 * Used by the radeon ttm implementation to move pages if
3553 * registered as the asic copy callback.
3554 */
3555int cik_copy_dma(struct radeon_device *rdev,
3556 uint64_t src_offset, uint64_t dst_offset,
3557 unsigned num_gpu_pages,
3558 struct radeon_fence **fence)
3559{
3560 struct radeon_semaphore *sem = NULL;
3561 int ring_index = rdev->asic->copy.dma_ring_index;
3562 struct radeon_ring *ring = &rdev->ring[ring_index];
3563 u32 size_in_bytes, cur_size_in_bytes;
3564 int i, num_loops;
3565 int r = 0;
3566
3567 r = radeon_semaphore_create(rdev, &sem);
3568 if (r) {
3569 DRM_ERROR("radeon: moving bo (%d).\n", r);
3570 return r;
3571 }
3572
3573 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3574 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3575 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3576 if (r) {
3577 DRM_ERROR("radeon: moving bo (%d).\n", r);
3578 radeon_semaphore_free(rdev, &sem, NULL);
3579 return r;
3580 }
3581
3582 if (radeon_fence_need_sync(*fence, ring->idx)) {
3583 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3584 ring->idx);
3585 radeon_fence_note_sync(*fence, ring->idx);
3586 } else {
3587 radeon_semaphore_free(rdev, &sem, NULL);
3588 }
3589
3590 for (i = 0; i < num_loops; i++) {
3591 cur_size_in_bytes = size_in_bytes;
3592 if (cur_size_in_bytes > 0x1fffff)
3593 cur_size_in_bytes = 0x1fffff;
3594 size_in_bytes -= cur_size_in_bytes;
3595 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3596 radeon_ring_write(ring, cur_size_in_bytes);
3597 radeon_ring_write(ring, 0); /* src/dst endian swap */
3598 radeon_ring_write(ring, src_offset & 0xffffffff);
3599 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3600 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3601 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3602 src_offset += cur_size_in_bytes;
3603 dst_offset += cur_size_in_bytes;
3604 }
3605
3606 r = radeon_fence_emit(rdev, fence, ring->idx);
3607 if (r) {
3608 radeon_ring_unlock_undo(rdev, ring);
3609 return r;
3610 }
3611
3612 radeon_ring_unlock_commit(rdev, ring);
3613 radeon_semaphore_free(rdev, &sem, *fence);
3614
3615 return r;
3616}
3617
3618/**
3619 * cik_sdma_ring_test - simple async dma engine test
3620 *
3621 * @rdev: radeon_device pointer
3622 * @ring: radeon_ring structure holding ring information
3623 *
3624 * Test the DMA engine by writing using it to write an
3625 * value to memory. (CIK).
3626 * Returns 0 for success, error for failure.
3627 */
3628int cik_sdma_ring_test(struct radeon_device *rdev,
3629 struct radeon_ring *ring)
3630{
3631 unsigned i;
3632 int r;
3633 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3634 u32 tmp;
3635
3636 if (!ptr) {
3637 DRM_ERROR("invalid vram scratch pointer\n");
3638 return -EINVAL;
3639 }
3640
3641 tmp = 0xCAFEDEAD;
3642 writel(tmp, ptr);
3643
3644 r = radeon_ring_lock(rdev, ring, 4);
3645 if (r) {
3646 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3647 return r;
3648 }
3649 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3650 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3651 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3652 radeon_ring_write(ring, 1); /* number of DWs to follow */
3653 radeon_ring_write(ring, 0xDEADBEEF);
3654 radeon_ring_unlock_commit(rdev, ring);
3655
3656 for (i = 0; i < rdev->usec_timeout; i++) {
3657 tmp = readl(ptr);
3658 if (tmp == 0xDEADBEEF)
3659 break;
3660 DRM_UDELAY(1);
3661 }
3662
3663 if (i < rdev->usec_timeout) {
3664 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3665 } else {
3666 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3667 ring->idx, tmp);
3668 r = -EINVAL;
3669 }
3670 return r;
3671}
3672
3673/**
3674 * cik_sdma_ib_test - test an IB on the DMA engine
3675 *
3676 * @rdev: radeon_device pointer
3677 * @ring: radeon_ring structure holding ring information
3678 *
3679 * Test a simple IB in the DMA ring (CIK).
3680 * Returns 0 on success, error on failure.
3681 */
3682int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3683{
3684 struct radeon_ib ib;
3685 unsigned i;
3686 int r;
3687 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3688 u32 tmp = 0;
3689
3690 if (!ptr) {
3691 DRM_ERROR("invalid vram scratch pointer\n");
3692 return -EINVAL;
3693 }
3694
3695 tmp = 0xCAFEDEAD;
3696 writel(tmp, ptr);
3697
3698 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3699 if (r) {
3700 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3701 return r;
3702 }
3703
3704 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3705 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3706 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3707 ib.ptr[3] = 1;
3708 ib.ptr[4] = 0xDEADBEEF;
3709 ib.length_dw = 5;
3710
3711 r = radeon_ib_schedule(rdev, &ib, NULL);
3712 if (r) {
3713 radeon_ib_free(rdev, &ib);
3714 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3715 return r;
3716 }
3717 r = radeon_fence_wait(ib.fence, false);
3718 if (r) {
3719 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3720 return r;
3721 }
3722 for (i = 0; i < rdev->usec_timeout; i++) {
3723 tmp = readl(ptr);
3724 if (tmp == 0xDEADBEEF)
3725 break;
3726 DRM_UDELAY(1);
3727 }
3728 if (i < rdev->usec_timeout) {
3729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3730 } else {
3731 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3732 r = -EINVAL;
3733 }
3734 radeon_ib_free(rdev, &ib);
3735 return r;
3736}
3737
Alex Deuchercc066712013-04-09 12:59:51 -04003738
3739static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3740{
3741 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3742 RREG32(GRBM_STATUS));
3743 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3744 RREG32(GRBM_STATUS2));
3745 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3746 RREG32(GRBM_STATUS_SE0));
3747 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3748 RREG32(GRBM_STATUS_SE1));
3749 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3750 RREG32(GRBM_STATUS_SE2));
3751 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3752 RREG32(GRBM_STATUS_SE3));
3753 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3754 RREG32(SRBM_STATUS));
3755 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3756 RREG32(SRBM_STATUS2));
3757 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3758 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3759 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3760 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003761 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3762 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3763 RREG32(CP_STALLED_STAT1));
3764 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3765 RREG32(CP_STALLED_STAT2));
3766 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3767 RREG32(CP_STALLED_STAT3));
3768 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3769 RREG32(CP_CPF_BUSY_STAT));
3770 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3771 RREG32(CP_CPF_STALLED_STAT1));
3772 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3773 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3774 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3775 RREG32(CP_CPC_STALLED_STAT1));
3776 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003777}
3778
Alex Deucher6f2043c2013-04-09 12:43:41 -04003779/**
Alex Deuchercc066712013-04-09 12:59:51 -04003780 * cik_gpu_check_soft_reset - check which blocks are busy
3781 *
3782 * @rdev: radeon_device pointer
3783 *
3784 * Check which blocks are busy and return the relevant reset
3785 * mask to be used by cik_gpu_soft_reset().
3786 * Returns a mask of the blocks to be reset.
3787 */
3788static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3789{
3790 u32 reset_mask = 0;
3791 u32 tmp;
3792
3793 /* GRBM_STATUS */
3794 tmp = RREG32(GRBM_STATUS);
3795 if (tmp & (PA_BUSY | SC_BUSY |
3796 BCI_BUSY | SX_BUSY |
3797 TA_BUSY | VGT_BUSY |
3798 DB_BUSY | CB_BUSY |
3799 GDS_BUSY | SPI_BUSY |
3800 IA_BUSY | IA_BUSY_NO_DMA))
3801 reset_mask |= RADEON_RESET_GFX;
3802
3803 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3804 reset_mask |= RADEON_RESET_CP;
3805
3806 /* GRBM_STATUS2 */
3807 tmp = RREG32(GRBM_STATUS2);
3808 if (tmp & RLC_BUSY)
3809 reset_mask |= RADEON_RESET_RLC;
3810
3811 /* SDMA0_STATUS_REG */
3812 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3813 if (!(tmp & SDMA_IDLE))
3814 reset_mask |= RADEON_RESET_DMA;
3815
3816 /* SDMA1_STATUS_REG */
3817 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3818 if (!(tmp & SDMA_IDLE))
3819 reset_mask |= RADEON_RESET_DMA1;
3820
3821 /* SRBM_STATUS2 */
3822 tmp = RREG32(SRBM_STATUS2);
3823 if (tmp & SDMA_BUSY)
3824 reset_mask |= RADEON_RESET_DMA;
3825
3826 if (tmp & SDMA1_BUSY)
3827 reset_mask |= RADEON_RESET_DMA1;
3828
3829 /* SRBM_STATUS */
3830 tmp = RREG32(SRBM_STATUS);
3831
3832 if (tmp & IH_BUSY)
3833 reset_mask |= RADEON_RESET_IH;
3834
3835 if (tmp & SEM_BUSY)
3836 reset_mask |= RADEON_RESET_SEM;
3837
3838 if (tmp & GRBM_RQ_PENDING)
3839 reset_mask |= RADEON_RESET_GRBM;
3840
3841 if (tmp & VMC_BUSY)
3842 reset_mask |= RADEON_RESET_VMC;
3843
3844 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845 MCC_BUSY | MCD_BUSY))
3846 reset_mask |= RADEON_RESET_MC;
3847
3848 if (evergreen_is_display_hung(rdev))
3849 reset_mask |= RADEON_RESET_DISPLAY;
3850
3851 /* Skip MC reset as it's mostly likely not hung, just busy */
3852 if (reset_mask & RADEON_RESET_MC) {
3853 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3854 reset_mask &= ~RADEON_RESET_MC;
3855 }
3856
3857 return reset_mask;
3858}
3859
3860/**
3861 * cik_gpu_soft_reset - soft reset GPU
3862 *
3863 * @rdev: radeon_device pointer
3864 * @reset_mask: mask of which blocks to reset
3865 *
3866 * Soft reset the blocks specified in @reset_mask.
3867 */
3868static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3869{
3870 struct evergreen_mc_save save;
3871 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3872 u32 tmp;
3873
3874 if (reset_mask == 0)
3875 return;
3876
3877 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3878
3879 cik_print_gpu_status_regs(rdev);
3880 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3881 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3882 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3883 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3884
3885 /* stop the rlc */
3886 cik_rlc_stop(rdev);
3887
3888 /* Disable GFX parsing/prefetching */
3889 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3890
3891 /* Disable MEC parsing/prefetching */
3892 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3893
3894 if (reset_mask & RADEON_RESET_DMA) {
3895 /* sdma0 */
3896 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3897 tmp |= SDMA_HALT;
3898 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3899 }
3900 if (reset_mask & RADEON_RESET_DMA1) {
3901 /* sdma1 */
3902 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3903 tmp |= SDMA_HALT;
3904 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3905 }
3906
3907 evergreen_mc_stop(rdev, &save);
3908 if (evergreen_mc_wait_for_idle(rdev)) {
3909 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910 }
3911
3912 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3913 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3914
3915 if (reset_mask & RADEON_RESET_CP) {
3916 grbm_soft_reset |= SOFT_RESET_CP;
3917
3918 srbm_soft_reset |= SOFT_RESET_GRBM;
3919 }
3920
3921 if (reset_mask & RADEON_RESET_DMA)
3922 srbm_soft_reset |= SOFT_RESET_SDMA;
3923
3924 if (reset_mask & RADEON_RESET_DMA1)
3925 srbm_soft_reset |= SOFT_RESET_SDMA1;
3926
3927 if (reset_mask & RADEON_RESET_DISPLAY)
3928 srbm_soft_reset |= SOFT_RESET_DC;
3929
3930 if (reset_mask & RADEON_RESET_RLC)
3931 grbm_soft_reset |= SOFT_RESET_RLC;
3932
3933 if (reset_mask & RADEON_RESET_SEM)
3934 srbm_soft_reset |= SOFT_RESET_SEM;
3935
3936 if (reset_mask & RADEON_RESET_IH)
3937 srbm_soft_reset |= SOFT_RESET_IH;
3938
3939 if (reset_mask & RADEON_RESET_GRBM)
3940 srbm_soft_reset |= SOFT_RESET_GRBM;
3941
3942 if (reset_mask & RADEON_RESET_VMC)
3943 srbm_soft_reset |= SOFT_RESET_VMC;
3944
3945 if (!(rdev->flags & RADEON_IS_IGP)) {
3946 if (reset_mask & RADEON_RESET_MC)
3947 srbm_soft_reset |= SOFT_RESET_MC;
3948 }
3949
3950 if (grbm_soft_reset) {
3951 tmp = RREG32(GRBM_SOFT_RESET);
3952 tmp |= grbm_soft_reset;
3953 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954 WREG32(GRBM_SOFT_RESET, tmp);
3955 tmp = RREG32(GRBM_SOFT_RESET);
3956
3957 udelay(50);
3958
3959 tmp &= ~grbm_soft_reset;
3960 WREG32(GRBM_SOFT_RESET, tmp);
3961 tmp = RREG32(GRBM_SOFT_RESET);
3962 }
3963
3964 if (srbm_soft_reset) {
3965 tmp = RREG32(SRBM_SOFT_RESET);
3966 tmp |= srbm_soft_reset;
3967 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968 WREG32(SRBM_SOFT_RESET, tmp);
3969 tmp = RREG32(SRBM_SOFT_RESET);
3970
3971 udelay(50);
3972
3973 tmp &= ~srbm_soft_reset;
3974 WREG32(SRBM_SOFT_RESET, tmp);
3975 tmp = RREG32(SRBM_SOFT_RESET);
3976 }
3977
3978 /* Wait a little for things to settle down */
3979 udelay(50);
3980
3981 evergreen_mc_resume(rdev, &save);
3982 udelay(50);
3983
3984 cik_print_gpu_status_regs(rdev);
3985}
3986
3987/**
3988 * cik_asic_reset - soft reset GPU
3989 *
3990 * @rdev: radeon_device pointer
3991 *
3992 * Look up which blocks are hung and attempt
3993 * to reset them.
3994 * Returns 0 for success.
3995 */
3996int cik_asic_reset(struct radeon_device *rdev)
3997{
3998 u32 reset_mask;
3999
4000 reset_mask = cik_gpu_check_soft_reset(rdev);
4001
4002 if (reset_mask)
4003 r600_set_bios_scratch_engine_hung(rdev, true);
4004
4005 cik_gpu_soft_reset(rdev, reset_mask);
4006
4007 reset_mask = cik_gpu_check_soft_reset(rdev);
4008
4009 if (!reset_mask)
4010 r600_set_bios_scratch_engine_hung(rdev, false);
4011
4012 return 0;
4013}
4014
4015/**
4016 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004017 *
4018 * @rdev: radeon_device pointer
4019 * @ring: radeon_ring structure holding ring information
4020 *
4021 * Check if the 3D engine is locked up (CIK).
4022 * Returns true if the engine is locked, false if not.
4023 */
Alex Deuchercc066712013-04-09 12:59:51 -04004024bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004025{
Alex Deuchercc066712013-04-09 12:59:51 -04004026 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004027
Alex Deuchercc066712013-04-09 12:59:51 -04004028 if (!(reset_mask & (RADEON_RESET_GFX |
4029 RADEON_RESET_COMPUTE |
4030 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004031 radeon_ring_lockup_update(ring);
4032 return false;
4033 }
4034 /* force CP activities */
4035 radeon_ring_force_activity(rdev, ring);
4036 return radeon_ring_test_lockup(rdev, ring);
4037}
4038
4039/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004040 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4041 *
4042 * @rdev: radeon_device pointer
4043 * @ring: radeon_ring structure holding ring information
4044 *
4045 * Check if the async DMA engine is locked up (CIK).
4046 * Returns true if the engine appears to be locked up, false if not.
4047 */
4048bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4049{
Alex Deuchercc066712013-04-09 12:59:51 -04004050 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4051 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004052
4053 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004054 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004055 else
Alex Deuchercc066712013-04-09 12:59:51 -04004056 mask = RADEON_RESET_DMA1;
4057
4058 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004059 radeon_ring_lockup_update(ring);
4060 return false;
4061 }
4062 /* force ring activities */
4063 radeon_ring_force_activity(rdev, ring);
4064 return radeon_ring_test_lockup(rdev, ring);
4065}
4066
Alex Deucher1c491652013-04-09 12:45:26 -04004067/* MC */
4068/**
4069 * cik_mc_program - program the GPU memory controller
4070 *
4071 * @rdev: radeon_device pointer
4072 *
4073 * Set the location of vram, gart, and AGP in the GPU's
4074 * physical address space (CIK).
4075 */
4076static void cik_mc_program(struct radeon_device *rdev)
4077{
4078 struct evergreen_mc_save save;
4079 u32 tmp;
4080 int i, j;
4081
4082 /* Initialize HDP */
4083 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4084 WREG32((0x2c14 + j), 0x00000000);
4085 WREG32((0x2c18 + j), 0x00000000);
4086 WREG32((0x2c1c + j), 0x00000000);
4087 WREG32((0x2c20 + j), 0x00000000);
4088 WREG32((0x2c24 + j), 0x00000000);
4089 }
4090 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4091
4092 evergreen_mc_stop(rdev, &save);
4093 if (radeon_mc_wait_for_idle(rdev)) {
4094 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4095 }
4096 /* Lockout access through VGA aperture*/
4097 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4098 /* Update configuration */
4099 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4100 rdev->mc.vram_start >> 12);
4101 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4102 rdev->mc.vram_end >> 12);
4103 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4104 rdev->vram_scratch.gpu_addr >> 12);
4105 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4106 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4107 WREG32(MC_VM_FB_LOCATION, tmp);
4108 /* XXX double check these! */
4109 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4110 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4111 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4112 WREG32(MC_VM_AGP_BASE, 0);
4113 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4114 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4115 if (radeon_mc_wait_for_idle(rdev)) {
4116 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117 }
4118 evergreen_mc_resume(rdev, &save);
4119 /* we need to own VRAM, so turn off the VGA renderer here
4120 * to stop it overwriting our objects */
4121 rv515_vga_render_disable(rdev);
4122}
4123
4124/**
4125 * cik_mc_init - initialize the memory controller driver params
4126 *
4127 * @rdev: radeon_device pointer
4128 *
4129 * Look up the amount of vram, vram width, and decide how to place
4130 * vram and gart within the GPU's physical address space (CIK).
4131 * Returns 0 for success.
4132 */
4133static int cik_mc_init(struct radeon_device *rdev)
4134{
4135 u32 tmp;
4136 int chansize, numchan;
4137
4138 /* Get VRAM informations */
4139 rdev->mc.vram_is_ddr = true;
4140 tmp = RREG32(MC_ARB_RAMCFG);
4141 if (tmp & CHANSIZE_MASK) {
4142 chansize = 64;
4143 } else {
4144 chansize = 32;
4145 }
4146 tmp = RREG32(MC_SHARED_CHMAP);
4147 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4148 case 0:
4149 default:
4150 numchan = 1;
4151 break;
4152 case 1:
4153 numchan = 2;
4154 break;
4155 case 2:
4156 numchan = 4;
4157 break;
4158 case 3:
4159 numchan = 8;
4160 break;
4161 case 4:
4162 numchan = 3;
4163 break;
4164 case 5:
4165 numchan = 6;
4166 break;
4167 case 6:
4168 numchan = 10;
4169 break;
4170 case 7:
4171 numchan = 12;
4172 break;
4173 case 8:
4174 numchan = 16;
4175 break;
4176 }
4177 rdev->mc.vram_width = numchan * chansize;
4178 /* Could aper size report 0 ? */
4179 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4180 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4181 /* size in MB on si */
4182 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4183 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4184 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4185 si_vram_gtt_location(rdev, &rdev->mc);
4186 radeon_update_bandwidth_info(rdev);
4187
4188 return 0;
4189}
4190
4191/*
4192 * GART
4193 * VMID 0 is the physical GPU addresses as used by the kernel.
4194 * VMIDs 1-15 are used for userspace clients and are handled
4195 * by the radeon vm/hsa code.
4196 */
4197/**
4198 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4199 *
4200 * @rdev: radeon_device pointer
4201 *
4202 * Flush the TLB for the VMID 0 page table (CIK).
4203 */
4204void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4205{
4206 /* flush hdp cache */
4207 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4208
4209 /* bits 0-15 are the VM contexts0-15 */
4210 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4211}
4212
4213/**
4214 * cik_pcie_gart_enable - gart enable
4215 *
4216 * @rdev: radeon_device pointer
4217 *
4218 * This sets up the TLBs, programs the page tables for VMID0,
4219 * sets up the hw for VMIDs 1-15 which are allocated on
4220 * demand, and sets up the global locations for the LDS, GDS,
4221 * and GPUVM for FSA64 clients (CIK).
4222 * Returns 0 for success, errors for failure.
4223 */
4224static int cik_pcie_gart_enable(struct radeon_device *rdev)
4225{
4226 int r, i;
4227
4228 if (rdev->gart.robj == NULL) {
4229 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4230 return -EINVAL;
4231 }
4232 r = radeon_gart_table_vram_pin(rdev);
4233 if (r)
4234 return r;
4235 radeon_gart_restore(rdev);
4236 /* Setup TLB control */
4237 WREG32(MC_VM_MX_L1_TLB_CNTL,
4238 (0xA << 7) |
4239 ENABLE_L1_TLB |
4240 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4241 ENABLE_ADVANCED_DRIVER_MODEL |
4242 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4243 /* Setup L2 cache */
4244 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4245 ENABLE_L2_FRAGMENT_PROCESSING |
4246 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4247 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4248 EFFECTIVE_L2_QUEUE_SIZE(7) |
4249 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4250 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4251 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4252 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4253 /* setup context0 */
4254 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4255 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4256 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4257 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4258 (u32)(rdev->dummy_page.addr >> 12));
4259 WREG32(VM_CONTEXT0_CNTL2, 0);
4260 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4261 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4262
4263 WREG32(0x15D4, 0);
4264 WREG32(0x15D8, 0);
4265 WREG32(0x15DC, 0);
4266
4267 /* empty context1-15 */
4268 /* FIXME start with 4G, once using 2 level pt switch to full
4269 * vm size space
4270 */
4271 /* set vm size, must be a multiple of 4 */
4272 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4273 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4274 for (i = 1; i < 16; i++) {
4275 if (i < 8)
4276 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4277 rdev->gart.table_addr >> 12);
4278 else
4279 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4280 rdev->gart.table_addr >> 12);
4281 }
4282
4283 /* enable context1-15 */
4284 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4285 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004286 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004287 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004288 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4290 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4294 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4296 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004300
4301 /* TC cache setup ??? */
4302 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4303 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4304 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4305
4306 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4307 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4308 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4309 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4310 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4311
4312 WREG32(TC_CFG_L1_VOLATILE, 0);
4313 WREG32(TC_CFG_L2_VOLATILE, 0);
4314
4315 if (rdev->family == CHIP_KAVERI) {
4316 u32 tmp = RREG32(CHUB_CONTROL);
4317 tmp &= ~BYPASS_VM;
4318 WREG32(CHUB_CONTROL, tmp);
4319 }
4320
4321 /* XXX SH_MEM regs */
4322 /* where to put LDS, scratch, GPUVM in FSA64 space */
4323 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004324 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004325 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004326 WREG32(SH_MEM_CONFIG, 0);
4327 WREG32(SH_MEM_APE1_BASE, 1);
4328 WREG32(SH_MEM_APE1_LIMIT, 0);
4329 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004330 /* SDMA GFX */
4331 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4332 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4333 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4334 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4335 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004336 }
Alex Deucherb556b122013-01-29 10:44:22 -05004337 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04004338
4339 cik_pcie_gart_tlb_flush(rdev);
4340 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4341 (unsigned)(rdev->mc.gtt_size >> 20),
4342 (unsigned long long)rdev->gart.table_addr);
4343 rdev->gart.ready = true;
4344 return 0;
4345}
4346
4347/**
4348 * cik_pcie_gart_disable - gart disable
4349 *
4350 * @rdev: radeon_device pointer
4351 *
4352 * This disables all VM page table (CIK).
4353 */
4354static void cik_pcie_gart_disable(struct radeon_device *rdev)
4355{
4356 /* Disable all tables */
4357 WREG32(VM_CONTEXT0_CNTL, 0);
4358 WREG32(VM_CONTEXT1_CNTL, 0);
4359 /* Setup TLB control */
4360 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4361 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4362 /* Setup L2 cache */
4363 WREG32(VM_L2_CNTL,
4364 ENABLE_L2_FRAGMENT_PROCESSING |
4365 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4366 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4367 EFFECTIVE_L2_QUEUE_SIZE(7) |
4368 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369 WREG32(VM_L2_CNTL2, 0);
4370 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4371 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4372 radeon_gart_table_vram_unpin(rdev);
4373}
4374
4375/**
4376 * cik_pcie_gart_fini - vm fini callback
4377 *
4378 * @rdev: radeon_device pointer
4379 *
4380 * Tears down the driver GART/VM setup (CIK).
4381 */
4382static void cik_pcie_gart_fini(struct radeon_device *rdev)
4383{
4384 cik_pcie_gart_disable(rdev);
4385 radeon_gart_table_vram_free(rdev);
4386 radeon_gart_fini(rdev);
4387}
4388
4389/* vm parser */
4390/**
4391 * cik_ib_parse - vm ib_parse callback
4392 *
4393 * @rdev: radeon_device pointer
4394 * @ib: indirect buffer pointer
4395 *
4396 * CIK uses hw IB checking so this is a nop (CIK).
4397 */
4398int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4399{
4400 return 0;
4401}
4402
4403/*
4404 * vm
4405 * VMID 0 is the physical GPU addresses as used by the kernel.
4406 * VMIDs 1-15 are used for userspace clients and are handled
4407 * by the radeon vm/hsa code.
4408 */
4409/**
4410 * cik_vm_init - cik vm init callback
4411 *
4412 * @rdev: radeon_device pointer
4413 *
4414 * Inits cik specific vm parameters (number of VMs, base of vram for
4415 * VMIDs 1-15) (CIK).
4416 * Returns 0 for success.
4417 */
4418int cik_vm_init(struct radeon_device *rdev)
4419{
4420 /* number of VMs */
4421 rdev->vm_manager.nvm = 16;
4422 /* base offset of vram pages */
4423 if (rdev->flags & RADEON_IS_IGP) {
4424 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4425 tmp <<= 22;
4426 rdev->vm_manager.vram_base_offset = tmp;
4427 } else
4428 rdev->vm_manager.vram_base_offset = 0;
4429
4430 return 0;
4431}
4432
4433/**
4434 * cik_vm_fini - cik vm fini callback
4435 *
4436 * @rdev: radeon_device pointer
4437 *
4438 * Tear down any asic specific VM setup (CIK).
4439 */
4440void cik_vm_fini(struct radeon_device *rdev)
4441{
4442}
4443
Alex Deucherf96ab482012-08-31 10:37:47 -04004444/**
4445 * cik_vm_flush - cik vm flush using the CP
4446 *
4447 * @rdev: radeon_device pointer
4448 *
4449 * Update the page table base and flush the VM TLB
4450 * using the CP (CIK).
4451 */
4452void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4453{
4454 struct radeon_ring *ring = &rdev->ring[ridx];
4455
4456 if (vm == NULL)
4457 return;
4458
4459 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4460 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4461 WRITE_DATA_DST_SEL(0)));
4462 if (vm->id < 8) {
4463 radeon_ring_write(ring,
4464 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4465 } else {
4466 radeon_ring_write(ring,
4467 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4468 }
4469 radeon_ring_write(ring, 0);
4470 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4471
4472 /* update SH_MEM_* regs */
4473 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4474 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4475 WRITE_DATA_DST_SEL(0)));
4476 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4477 radeon_ring_write(ring, 0);
4478 radeon_ring_write(ring, VMID(vm->id));
4479
4480 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4481 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4482 WRITE_DATA_DST_SEL(0)));
4483 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4484 radeon_ring_write(ring, 0);
4485
4486 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4487 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4488 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4489 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4490
4491 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4492 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493 WRITE_DATA_DST_SEL(0)));
4494 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4495 radeon_ring_write(ring, 0);
4496 radeon_ring_write(ring, VMID(0));
4497
4498 /* HDP flush */
4499 /* We should be using the WAIT_REG_MEM packet here like in
4500 * cik_fence_ring_emit(), but it causes the CP to hang in this
4501 * context...
4502 */
4503 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4504 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4505 WRITE_DATA_DST_SEL(0)));
4506 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4507 radeon_ring_write(ring, 0);
4508 radeon_ring_write(ring, 0);
4509
4510 /* bits 0-15 are the VM contexts0-15 */
4511 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4512 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4513 WRITE_DATA_DST_SEL(0)));
4514 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4515 radeon_ring_write(ring, 0);
4516 radeon_ring_write(ring, 1 << vm->id);
4517
Alex Deucherb07fdd32013-04-11 09:36:17 -04004518 /* compute doesn't have PFP */
4519 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4520 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4521 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4522 radeon_ring_write(ring, 0x0);
4523 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004524}
4525
Alex Deucher605de6b2012-10-22 13:04:03 -04004526/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004527 * cik_vm_set_page - update the page tables using sDMA
4528 *
4529 * @rdev: radeon_device pointer
4530 * @ib: indirect buffer to fill with commands
4531 * @pe: addr of the page entry
4532 * @addr: dst addr to write into pe
4533 * @count: number of page entries to update
4534 * @incr: increase next addr by incr bytes
4535 * @flags: access flags
4536 *
4537 * Update the page tables using CP or sDMA (CIK).
4538 */
4539void cik_vm_set_page(struct radeon_device *rdev,
4540 struct radeon_ib *ib,
4541 uint64_t pe,
4542 uint64_t addr, unsigned count,
4543 uint32_t incr, uint32_t flags)
4544{
4545 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4546 uint64_t value;
4547 unsigned ndw;
4548
4549 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4550 /* CP */
4551 while (count) {
4552 ndw = 2 + count * 2;
4553 if (ndw > 0x3FFE)
4554 ndw = 0x3FFE;
4555
4556 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4557 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4558 WRITE_DATA_DST_SEL(1));
4559 ib->ptr[ib->length_dw++] = pe;
4560 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4561 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4562 if (flags & RADEON_VM_PAGE_SYSTEM) {
4563 value = radeon_vm_map_gart(rdev, addr);
4564 value &= 0xFFFFFFFFFFFFF000ULL;
4565 } else if (flags & RADEON_VM_PAGE_VALID) {
4566 value = addr;
4567 } else {
4568 value = 0;
4569 }
4570 addr += incr;
4571 value |= r600_flags;
4572 ib->ptr[ib->length_dw++] = value;
4573 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4574 }
4575 }
4576 } else {
4577 /* DMA */
4578 if (flags & RADEON_VM_PAGE_SYSTEM) {
4579 while (count) {
4580 ndw = count * 2;
4581 if (ndw > 0xFFFFE)
4582 ndw = 0xFFFFE;
4583
4584 /* for non-physically contiguous pages (system) */
4585 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4586 ib->ptr[ib->length_dw++] = pe;
4587 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4588 ib->ptr[ib->length_dw++] = ndw;
4589 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4590 if (flags & RADEON_VM_PAGE_SYSTEM) {
4591 value = radeon_vm_map_gart(rdev, addr);
4592 value &= 0xFFFFFFFFFFFFF000ULL;
4593 } else if (flags & RADEON_VM_PAGE_VALID) {
4594 value = addr;
4595 } else {
4596 value = 0;
4597 }
4598 addr += incr;
4599 value |= r600_flags;
4600 ib->ptr[ib->length_dw++] = value;
4601 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4602 }
4603 }
4604 } else {
4605 while (count) {
4606 ndw = count;
4607 if (ndw > 0x7FFFF)
4608 ndw = 0x7FFFF;
4609
4610 if (flags & RADEON_VM_PAGE_VALID)
4611 value = addr;
4612 else
4613 value = 0;
4614 /* for physically contiguous pages (vram) */
4615 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4616 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4617 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4618 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4619 ib->ptr[ib->length_dw++] = 0;
4620 ib->ptr[ib->length_dw++] = value; /* value */
4621 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4622 ib->ptr[ib->length_dw++] = incr; /* increment size */
4623 ib->ptr[ib->length_dw++] = 0;
4624 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4625 pe += ndw * 8;
4626 addr += ndw * incr;
4627 count -= ndw;
4628 }
4629 }
4630 while (ib->length_dw & 0x7)
4631 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4632 }
4633}
4634
4635/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004636 * cik_dma_vm_flush - cik vm flush using sDMA
4637 *
4638 * @rdev: radeon_device pointer
4639 *
4640 * Update the page table base and flush the VM TLB
4641 * using sDMA (CIK).
4642 */
4643void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4644{
4645 struct radeon_ring *ring = &rdev->ring[ridx];
4646 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4647 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4648 u32 ref_and_mask;
4649
4650 if (vm == NULL)
4651 return;
4652
4653 if (ridx == R600_RING_TYPE_DMA_INDEX)
4654 ref_and_mask = SDMA0;
4655 else
4656 ref_and_mask = SDMA1;
4657
4658 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4659 if (vm->id < 8) {
4660 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4661 } else {
4662 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4663 }
4664 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4665
4666 /* update SH_MEM_* regs */
4667 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4668 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4669 radeon_ring_write(ring, VMID(vm->id));
4670
4671 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4672 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4673 radeon_ring_write(ring, 0);
4674
4675 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4676 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4677 radeon_ring_write(ring, 0);
4678
4679 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4680 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4681 radeon_ring_write(ring, 1);
4682
4683 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4684 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4685 radeon_ring_write(ring, 0);
4686
4687 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4688 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4689 radeon_ring_write(ring, VMID(0));
4690
4691 /* flush HDP */
4692 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4693 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4694 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4695 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4696 radeon_ring_write(ring, ref_and_mask); /* MASK */
4697 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4698
4699 /* flush TLB */
4700 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4701 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4702 radeon_ring_write(ring, 1 << vm->id);
4703}
4704
Alex Deucherf6796ca2012-11-09 10:44:08 -05004705/*
4706 * RLC
4707 * The RLC is a multi-purpose microengine that handles a
4708 * variety of functions, the most important of which is
4709 * the interrupt controller.
4710 */
4711/**
4712 * cik_rlc_stop - stop the RLC ME
4713 *
4714 * @rdev: radeon_device pointer
4715 *
4716 * Halt the RLC ME (MicroEngine) (CIK).
4717 */
4718static void cik_rlc_stop(struct radeon_device *rdev)
4719{
4720 int i, j, k;
4721 u32 mask, tmp;
4722
4723 tmp = RREG32(CP_INT_CNTL_RING0);
4724 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4725 WREG32(CP_INT_CNTL_RING0, tmp);
4726
4727 RREG32(CB_CGTT_SCLK_CTRL);
4728 RREG32(CB_CGTT_SCLK_CTRL);
4729 RREG32(CB_CGTT_SCLK_CTRL);
4730 RREG32(CB_CGTT_SCLK_CTRL);
4731
4732 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4733 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4734
4735 WREG32(RLC_CNTL, 0);
4736
4737 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4738 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4739 cik_select_se_sh(rdev, i, j);
4740 for (k = 0; k < rdev->usec_timeout; k++) {
4741 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4742 break;
4743 udelay(1);
4744 }
4745 }
4746 }
4747 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4748
4749 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4750 for (k = 0; k < rdev->usec_timeout; k++) {
4751 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4752 break;
4753 udelay(1);
4754 }
4755}
4756
4757/**
4758 * cik_rlc_start - start the RLC ME
4759 *
4760 * @rdev: radeon_device pointer
4761 *
4762 * Unhalt the RLC ME (MicroEngine) (CIK).
4763 */
4764static void cik_rlc_start(struct radeon_device *rdev)
4765{
4766 u32 tmp;
4767
4768 WREG32(RLC_CNTL, RLC_ENABLE);
4769
4770 tmp = RREG32(CP_INT_CNTL_RING0);
4771 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4772 WREG32(CP_INT_CNTL_RING0, tmp);
4773
4774 udelay(50);
4775}
4776
4777/**
4778 * cik_rlc_resume - setup the RLC hw
4779 *
4780 * @rdev: radeon_device pointer
4781 *
4782 * Initialize the RLC registers, load the ucode,
4783 * and start the RLC (CIK).
4784 * Returns 0 for success, -EINVAL if the ucode is not available.
4785 */
4786static int cik_rlc_resume(struct radeon_device *rdev)
4787{
4788 u32 i, size;
4789 u32 clear_state_info[3];
4790 const __be32 *fw_data;
4791
4792 if (!rdev->rlc_fw)
4793 return -EINVAL;
4794
4795 switch (rdev->family) {
4796 case CHIP_BONAIRE:
4797 default:
4798 size = BONAIRE_RLC_UCODE_SIZE;
4799 break;
4800 case CHIP_KAVERI:
4801 size = KV_RLC_UCODE_SIZE;
4802 break;
4803 case CHIP_KABINI:
4804 size = KB_RLC_UCODE_SIZE;
4805 break;
4806 }
4807
4808 cik_rlc_stop(rdev);
4809
4810 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4811 RREG32(GRBM_SOFT_RESET);
4812 udelay(50);
4813 WREG32(GRBM_SOFT_RESET, 0);
4814 RREG32(GRBM_SOFT_RESET);
4815 udelay(50);
4816
4817 WREG32(RLC_LB_CNTR_INIT, 0);
4818 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4819
4820 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4821 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4822 WREG32(RLC_LB_PARAMS, 0x00600408);
4823 WREG32(RLC_LB_CNTL, 0x80000004);
4824
4825 WREG32(RLC_MC_CNTL, 0);
4826 WREG32(RLC_UCODE_CNTL, 0);
4827
4828 fw_data = (const __be32 *)rdev->rlc_fw->data;
4829 WREG32(RLC_GPM_UCODE_ADDR, 0);
4830 for (i = 0; i < size; i++)
4831 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4832 WREG32(RLC_GPM_UCODE_ADDR, 0);
4833
4834 /* XXX */
4835 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4836 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4837 clear_state_info[2] = 0;//cik_default_size;
4838 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4839 for (i = 0; i < 3; i++)
4840 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4841 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4842
4843 cik_rlc_start(rdev);
4844
4845 return 0;
4846}
Alex Deuchera59781b2012-11-09 10:45:57 -05004847
4848/*
4849 * Interrupts
4850 * Starting with r6xx, interrupts are handled via a ring buffer.
4851 * Ring buffers are areas of GPU accessible memory that the GPU
4852 * writes interrupt vectors into and the host reads vectors out of.
4853 * There is a rptr (read pointer) that determines where the
4854 * host is currently reading, and a wptr (write pointer)
4855 * which determines where the GPU has written. When the
4856 * pointers are equal, the ring is idle. When the GPU
4857 * writes vectors to the ring buffer, it increments the
4858 * wptr. When there is an interrupt, the host then starts
4859 * fetching commands and processing them until the pointers are
4860 * equal again at which point it updates the rptr.
4861 */
4862
4863/**
4864 * cik_enable_interrupts - Enable the interrupt ring buffer
4865 *
4866 * @rdev: radeon_device pointer
4867 *
4868 * Enable the interrupt ring buffer (CIK).
4869 */
4870static void cik_enable_interrupts(struct radeon_device *rdev)
4871{
4872 u32 ih_cntl = RREG32(IH_CNTL);
4873 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4874
4875 ih_cntl |= ENABLE_INTR;
4876 ih_rb_cntl |= IH_RB_ENABLE;
4877 WREG32(IH_CNTL, ih_cntl);
4878 WREG32(IH_RB_CNTL, ih_rb_cntl);
4879 rdev->ih.enabled = true;
4880}
4881
4882/**
4883 * cik_disable_interrupts - Disable the interrupt ring buffer
4884 *
4885 * @rdev: radeon_device pointer
4886 *
4887 * Disable the interrupt ring buffer (CIK).
4888 */
4889static void cik_disable_interrupts(struct radeon_device *rdev)
4890{
4891 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4892 u32 ih_cntl = RREG32(IH_CNTL);
4893
4894 ih_rb_cntl &= ~IH_RB_ENABLE;
4895 ih_cntl &= ~ENABLE_INTR;
4896 WREG32(IH_RB_CNTL, ih_rb_cntl);
4897 WREG32(IH_CNTL, ih_cntl);
4898 /* set rptr, wptr to 0 */
4899 WREG32(IH_RB_RPTR, 0);
4900 WREG32(IH_RB_WPTR, 0);
4901 rdev->ih.enabled = false;
4902 rdev->ih.rptr = 0;
4903}
4904
4905/**
4906 * cik_disable_interrupt_state - Disable all interrupt sources
4907 *
4908 * @rdev: radeon_device pointer
4909 *
4910 * Clear all interrupt enable bits used by the driver (CIK).
4911 */
4912static void cik_disable_interrupt_state(struct radeon_device *rdev)
4913{
4914 u32 tmp;
4915
4916 /* gfx ring */
4917 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004918 /* sdma */
4919 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4920 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4921 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4922 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004923 /* compute queues */
4924 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4925 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4926 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4927 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4928 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4929 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4930 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4931 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4932 /* grbm */
4933 WREG32(GRBM_INT_CNTL, 0);
4934 /* vline/vblank, etc. */
4935 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4936 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4937 if (rdev->num_crtc >= 4) {
4938 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4939 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4940 }
4941 if (rdev->num_crtc >= 6) {
4942 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4943 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4944 }
4945
4946 /* dac hotplug */
4947 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4948
4949 /* digital hotplug */
4950 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4951 WREG32(DC_HPD1_INT_CONTROL, tmp);
4952 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4953 WREG32(DC_HPD2_INT_CONTROL, tmp);
4954 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4955 WREG32(DC_HPD3_INT_CONTROL, tmp);
4956 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4957 WREG32(DC_HPD4_INT_CONTROL, tmp);
4958 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4959 WREG32(DC_HPD5_INT_CONTROL, tmp);
4960 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4961 WREG32(DC_HPD6_INT_CONTROL, tmp);
4962
4963}
4964
4965/**
4966 * cik_irq_init - init and enable the interrupt ring
4967 *
4968 * @rdev: radeon_device pointer
4969 *
4970 * Allocate a ring buffer for the interrupt controller,
4971 * enable the RLC, disable interrupts, enable the IH
4972 * ring buffer and enable it (CIK).
4973 * Called at device load and reume.
4974 * Returns 0 for success, errors for failure.
4975 */
4976static int cik_irq_init(struct radeon_device *rdev)
4977{
4978 int ret = 0;
4979 int rb_bufsz;
4980 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4981
4982 /* allocate ring */
4983 ret = r600_ih_ring_alloc(rdev);
4984 if (ret)
4985 return ret;
4986
4987 /* disable irqs */
4988 cik_disable_interrupts(rdev);
4989
4990 /* init rlc */
4991 ret = cik_rlc_resume(rdev);
4992 if (ret) {
4993 r600_ih_ring_fini(rdev);
4994 return ret;
4995 }
4996
4997 /* setup interrupt control */
4998 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4999 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5000 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5001 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5002 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5003 */
5004 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5005 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5006 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5007 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5008
5009 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5010 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5011
5012 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5013 IH_WPTR_OVERFLOW_CLEAR |
5014 (rb_bufsz << 1));
5015
5016 if (rdev->wb.enabled)
5017 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5018
5019 /* set the writeback address whether it's enabled or not */
5020 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5021 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5022
5023 WREG32(IH_RB_CNTL, ih_rb_cntl);
5024
5025 /* set rptr, wptr to 0 */
5026 WREG32(IH_RB_RPTR, 0);
5027 WREG32(IH_RB_WPTR, 0);
5028
5029 /* Default settings for IH_CNTL (disabled at first) */
5030 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5031 /* RPTR_REARM only works if msi's are enabled */
5032 if (rdev->msi_enabled)
5033 ih_cntl |= RPTR_REARM;
5034 WREG32(IH_CNTL, ih_cntl);
5035
5036 /* force the active interrupt state to all disabled */
5037 cik_disable_interrupt_state(rdev);
5038
5039 pci_set_master(rdev->pdev);
5040
5041 /* enable irqs */
5042 cik_enable_interrupts(rdev);
5043
5044 return ret;
5045}
5046
5047/**
5048 * cik_irq_set - enable/disable interrupt sources
5049 *
5050 * @rdev: radeon_device pointer
5051 *
5052 * Enable interrupt sources on the GPU (vblanks, hpd,
5053 * etc.) (CIK).
5054 * Returns 0 for success, errors for failure.
5055 */
5056int cik_irq_set(struct radeon_device *rdev)
5057{
5058 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5059 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005060 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5061 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005062 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5063 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5064 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005065 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005066
5067 if (!rdev->irq.installed) {
5068 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5069 return -EINVAL;
5070 }
5071 /* don't enable anything if the ih is disabled */
5072 if (!rdev->ih.enabled) {
5073 cik_disable_interrupts(rdev);
5074 /* force the active interrupt state to all disabled */
5075 cik_disable_interrupt_state(rdev);
5076 return 0;
5077 }
5078
5079 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5080 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5081 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5082 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5083 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5084 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5085
Alex Deucher21a93e12013-04-09 12:47:11 -04005086 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5087 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5088
Alex Deucher2b0781a2013-04-09 14:26:16 -04005089 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5090 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5091 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5092 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5093 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5094 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5095 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5096 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5097
Alex Deuchera59781b2012-11-09 10:45:57 -05005098 /* enable CP interrupts on all rings */
5099 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5100 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5101 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5102 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005103 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5104 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5105 DRM_DEBUG("si_irq_set: sw int cp1\n");
5106 if (ring->me == 1) {
5107 switch (ring->pipe) {
5108 case 0:
5109 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5110 break;
5111 case 1:
5112 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5113 break;
5114 case 2:
5115 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5116 break;
5117 case 3:
5118 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5119 break;
5120 default:
5121 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5122 break;
5123 }
5124 } else if (ring->me == 2) {
5125 switch (ring->pipe) {
5126 case 0:
5127 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5128 break;
5129 case 1:
5130 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5131 break;
5132 case 2:
5133 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5134 break;
5135 case 3:
5136 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5137 break;
5138 default:
5139 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5140 break;
5141 }
5142 } else {
5143 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5144 }
5145 }
5146 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5147 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5148 DRM_DEBUG("si_irq_set: sw int cp2\n");
5149 if (ring->me == 1) {
5150 switch (ring->pipe) {
5151 case 0:
5152 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5153 break;
5154 case 1:
5155 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5156 break;
5157 case 2:
5158 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5159 break;
5160 case 3:
5161 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5162 break;
5163 default:
5164 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5165 break;
5166 }
5167 } else if (ring->me == 2) {
5168 switch (ring->pipe) {
5169 case 0:
5170 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5171 break;
5172 case 1:
5173 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5174 break;
5175 case 2:
5176 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5177 break;
5178 case 3:
5179 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5180 break;
5181 default:
5182 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5183 break;
5184 }
5185 } else {
5186 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5187 }
5188 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005189
Alex Deucher21a93e12013-04-09 12:47:11 -04005190 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5191 DRM_DEBUG("cik_irq_set: sw int dma\n");
5192 dma_cntl |= TRAP_ENABLE;
5193 }
5194
5195 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5196 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5197 dma_cntl1 |= TRAP_ENABLE;
5198 }
5199
Alex Deuchera59781b2012-11-09 10:45:57 -05005200 if (rdev->irq.crtc_vblank_int[0] ||
5201 atomic_read(&rdev->irq.pflip[0])) {
5202 DRM_DEBUG("cik_irq_set: vblank 0\n");
5203 crtc1 |= VBLANK_INTERRUPT_MASK;
5204 }
5205 if (rdev->irq.crtc_vblank_int[1] ||
5206 atomic_read(&rdev->irq.pflip[1])) {
5207 DRM_DEBUG("cik_irq_set: vblank 1\n");
5208 crtc2 |= VBLANK_INTERRUPT_MASK;
5209 }
5210 if (rdev->irq.crtc_vblank_int[2] ||
5211 atomic_read(&rdev->irq.pflip[2])) {
5212 DRM_DEBUG("cik_irq_set: vblank 2\n");
5213 crtc3 |= VBLANK_INTERRUPT_MASK;
5214 }
5215 if (rdev->irq.crtc_vblank_int[3] ||
5216 atomic_read(&rdev->irq.pflip[3])) {
5217 DRM_DEBUG("cik_irq_set: vblank 3\n");
5218 crtc4 |= VBLANK_INTERRUPT_MASK;
5219 }
5220 if (rdev->irq.crtc_vblank_int[4] ||
5221 atomic_read(&rdev->irq.pflip[4])) {
5222 DRM_DEBUG("cik_irq_set: vblank 4\n");
5223 crtc5 |= VBLANK_INTERRUPT_MASK;
5224 }
5225 if (rdev->irq.crtc_vblank_int[5] ||
5226 atomic_read(&rdev->irq.pflip[5])) {
5227 DRM_DEBUG("cik_irq_set: vblank 5\n");
5228 crtc6 |= VBLANK_INTERRUPT_MASK;
5229 }
5230 if (rdev->irq.hpd[0]) {
5231 DRM_DEBUG("cik_irq_set: hpd 1\n");
5232 hpd1 |= DC_HPDx_INT_EN;
5233 }
5234 if (rdev->irq.hpd[1]) {
5235 DRM_DEBUG("cik_irq_set: hpd 2\n");
5236 hpd2 |= DC_HPDx_INT_EN;
5237 }
5238 if (rdev->irq.hpd[2]) {
5239 DRM_DEBUG("cik_irq_set: hpd 3\n");
5240 hpd3 |= DC_HPDx_INT_EN;
5241 }
5242 if (rdev->irq.hpd[3]) {
5243 DRM_DEBUG("cik_irq_set: hpd 4\n");
5244 hpd4 |= DC_HPDx_INT_EN;
5245 }
5246 if (rdev->irq.hpd[4]) {
5247 DRM_DEBUG("cik_irq_set: hpd 5\n");
5248 hpd5 |= DC_HPDx_INT_EN;
5249 }
5250 if (rdev->irq.hpd[5]) {
5251 DRM_DEBUG("cik_irq_set: hpd 6\n");
5252 hpd6 |= DC_HPDx_INT_EN;
5253 }
5254
5255 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5256
Alex Deucher21a93e12013-04-09 12:47:11 -04005257 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5258 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5259
Alex Deucher2b0781a2013-04-09 14:26:16 -04005260 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5261 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5262 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5263 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5264 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5265 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5266 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5267 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5268
Alex Deuchera59781b2012-11-09 10:45:57 -05005269 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5270
5271 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5272 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5273 if (rdev->num_crtc >= 4) {
5274 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5275 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5276 }
5277 if (rdev->num_crtc >= 6) {
5278 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5279 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5280 }
5281
5282 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5283 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5284 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5285 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5286 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5287 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5288
5289 return 0;
5290}
5291
5292/**
5293 * cik_irq_ack - ack interrupt sources
5294 *
5295 * @rdev: radeon_device pointer
5296 *
5297 * Ack interrupt sources on the GPU (vblanks, hpd,
5298 * etc.) (CIK). Certain interrupts sources are sw
5299 * generated and do not require an explicit ack.
5300 */
5301static inline void cik_irq_ack(struct radeon_device *rdev)
5302{
5303 u32 tmp;
5304
5305 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5306 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5307 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5308 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5309 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5310 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5311 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5312
5313 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5314 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5315 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5316 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5317 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5318 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5319 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5320 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5321
5322 if (rdev->num_crtc >= 4) {
5323 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5324 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5325 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5326 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5327 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5328 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5329 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5330 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5331 }
5332
5333 if (rdev->num_crtc >= 6) {
5334 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5335 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5336 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5337 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5338 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5339 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5340 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5341 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5342 }
5343
5344 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5345 tmp = RREG32(DC_HPD1_INT_CONTROL);
5346 tmp |= DC_HPDx_INT_ACK;
5347 WREG32(DC_HPD1_INT_CONTROL, tmp);
5348 }
5349 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5350 tmp = RREG32(DC_HPD2_INT_CONTROL);
5351 tmp |= DC_HPDx_INT_ACK;
5352 WREG32(DC_HPD2_INT_CONTROL, tmp);
5353 }
5354 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5355 tmp = RREG32(DC_HPD3_INT_CONTROL);
5356 tmp |= DC_HPDx_INT_ACK;
5357 WREG32(DC_HPD3_INT_CONTROL, tmp);
5358 }
5359 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5360 tmp = RREG32(DC_HPD4_INT_CONTROL);
5361 tmp |= DC_HPDx_INT_ACK;
5362 WREG32(DC_HPD4_INT_CONTROL, tmp);
5363 }
5364 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5365 tmp = RREG32(DC_HPD5_INT_CONTROL);
5366 tmp |= DC_HPDx_INT_ACK;
5367 WREG32(DC_HPD5_INT_CONTROL, tmp);
5368 }
5369 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5370 tmp = RREG32(DC_HPD5_INT_CONTROL);
5371 tmp |= DC_HPDx_INT_ACK;
5372 WREG32(DC_HPD6_INT_CONTROL, tmp);
5373 }
5374}
5375
5376/**
5377 * cik_irq_disable - disable interrupts
5378 *
5379 * @rdev: radeon_device pointer
5380 *
5381 * Disable interrupts on the hw (CIK).
5382 */
5383static void cik_irq_disable(struct radeon_device *rdev)
5384{
5385 cik_disable_interrupts(rdev);
5386 /* Wait and acknowledge irq */
5387 mdelay(1);
5388 cik_irq_ack(rdev);
5389 cik_disable_interrupt_state(rdev);
5390}
5391
5392/**
5393 * cik_irq_disable - disable interrupts for suspend
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Disable interrupts and stop the RLC (CIK).
5398 * Used for suspend.
5399 */
5400static void cik_irq_suspend(struct radeon_device *rdev)
5401{
5402 cik_irq_disable(rdev);
5403 cik_rlc_stop(rdev);
5404}
5405
5406/**
5407 * cik_irq_fini - tear down interrupt support
5408 *
5409 * @rdev: radeon_device pointer
5410 *
5411 * Disable interrupts on the hw and free the IH ring
5412 * buffer (CIK).
5413 * Used for driver unload.
5414 */
5415static void cik_irq_fini(struct radeon_device *rdev)
5416{
5417 cik_irq_suspend(rdev);
5418 r600_ih_ring_fini(rdev);
5419}
5420
5421/**
5422 * cik_get_ih_wptr - get the IH ring buffer wptr
5423 *
5424 * @rdev: radeon_device pointer
5425 *
5426 * Get the IH ring buffer wptr from either the register
5427 * or the writeback memory buffer (CIK). Also check for
5428 * ring buffer overflow and deal with it.
5429 * Used by cik_irq_process().
5430 * Returns the value of the wptr.
5431 */
5432static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5433{
5434 u32 wptr, tmp;
5435
5436 if (rdev->wb.enabled)
5437 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5438 else
5439 wptr = RREG32(IH_RB_WPTR);
5440
5441 if (wptr & RB_OVERFLOW) {
5442 /* When a ring buffer overflow happen start parsing interrupt
5443 * from the last not overwritten vector (wptr + 16). Hopefully
5444 * this should allow us to catchup.
5445 */
5446 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5447 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5448 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5449 tmp = RREG32(IH_RB_CNTL);
5450 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5451 WREG32(IH_RB_CNTL, tmp);
5452 }
5453 return (wptr & rdev->ih.ptr_mask);
5454}
5455
5456/* CIK IV Ring
5457 * Each IV ring entry is 128 bits:
5458 * [7:0] - interrupt source id
5459 * [31:8] - reserved
5460 * [59:32] - interrupt source data
5461 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005462 * [71:64] - RINGID
5463 * CP:
5464 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005465 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5466 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5467 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5468 * PIPE_ID - ME0 0=3D
5469 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005470 * SDMA:
5471 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5472 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5473 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005474 * [79:72] - VMID
5475 * [95:80] - PASID
5476 * [127:96] - reserved
5477 */
5478/**
5479 * cik_irq_process - interrupt handler
5480 *
5481 * @rdev: radeon_device pointer
5482 *
5483 * Interrupt hander (CIK). Walk the IH ring,
5484 * ack interrupts and schedule work to handle
5485 * interrupt events.
5486 * Returns irq process return code.
5487 */
5488int cik_irq_process(struct radeon_device *rdev)
5489{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005490 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5491 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005492 u32 wptr;
5493 u32 rptr;
5494 u32 src_id, src_data, ring_id;
5495 u8 me_id, pipe_id, queue_id;
5496 u32 ring_index;
5497 bool queue_hotplug = false;
5498 bool queue_reset = false;
5499
5500 if (!rdev->ih.enabled || rdev->shutdown)
5501 return IRQ_NONE;
5502
5503 wptr = cik_get_ih_wptr(rdev);
5504
5505restart_ih:
5506 /* is somebody else already processing irqs? */
5507 if (atomic_xchg(&rdev->ih.lock, 1))
5508 return IRQ_NONE;
5509
5510 rptr = rdev->ih.rptr;
5511 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5512
5513 /* Order reading of wptr vs. reading of IH ring data */
5514 rmb();
5515
5516 /* display interrupts */
5517 cik_irq_ack(rdev);
5518
5519 while (rptr != wptr) {
5520 /* wptr/rptr are in bytes! */
5521 ring_index = rptr / 4;
5522 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5523 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5524 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005525
5526 switch (src_id) {
5527 case 1: /* D1 vblank/vline */
5528 switch (src_data) {
5529 case 0: /* D1 vblank */
5530 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5531 if (rdev->irq.crtc_vblank_int[0]) {
5532 drm_handle_vblank(rdev->ddev, 0);
5533 rdev->pm.vblank_sync = true;
5534 wake_up(&rdev->irq.vblank_queue);
5535 }
5536 if (atomic_read(&rdev->irq.pflip[0]))
5537 radeon_crtc_handle_flip(rdev, 0);
5538 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5539 DRM_DEBUG("IH: D1 vblank\n");
5540 }
5541 break;
5542 case 1: /* D1 vline */
5543 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5544 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5545 DRM_DEBUG("IH: D1 vline\n");
5546 }
5547 break;
5548 default:
5549 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5550 break;
5551 }
5552 break;
5553 case 2: /* D2 vblank/vline */
5554 switch (src_data) {
5555 case 0: /* D2 vblank */
5556 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5557 if (rdev->irq.crtc_vblank_int[1]) {
5558 drm_handle_vblank(rdev->ddev, 1);
5559 rdev->pm.vblank_sync = true;
5560 wake_up(&rdev->irq.vblank_queue);
5561 }
5562 if (atomic_read(&rdev->irq.pflip[1]))
5563 radeon_crtc_handle_flip(rdev, 1);
5564 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5565 DRM_DEBUG("IH: D2 vblank\n");
5566 }
5567 break;
5568 case 1: /* D2 vline */
5569 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5570 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5571 DRM_DEBUG("IH: D2 vline\n");
5572 }
5573 break;
5574 default:
5575 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5576 break;
5577 }
5578 break;
5579 case 3: /* D3 vblank/vline */
5580 switch (src_data) {
5581 case 0: /* D3 vblank */
5582 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5583 if (rdev->irq.crtc_vblank_int[2]) {
5584 drm_handle_vblank(rdev->ddev, 2);
5585 rdev->pm.vblank_sync = true;
5586 wake_up(&rdev->irq.vblank_queue);
5587 }
5588 if (atomic_read(&rdev->irq.pflip[2]))
5589 radeon_crtc_handle_flip(rdev, 2);
5590 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5591 DRM_DEBUG("IH: D3 vblank\n");
5592 }
5593 break;
5594 case 1: /* D3 vline */
5595 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5596 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5597 DRM_DEBUG("IH: D3 vline\n");
5598 }
5599 break;
5600 default:
5601 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5602 break;
5603 }
5604 break;
5605 case 4: /* D4 vblank/vline */
5606 switch (src_data) {
5607 case 0: /* D4 vblank */
5608 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5609 if (rdev->irq.crtc_vblank_int[3]) {
5610 drm_handle_vblank(rdev->ddev, 3);
5611 rdev->pm.vblank_sync = true;
5612 wake_up(&rdev->irq.vblank_queue);
5613 }
5614 if (atomic_read(&rdev->irq.pflip[3]))
5615 radeon_crtc_handle_flip(rdev, 3);
5616 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5617 DRM_DEBUG("IH: D4 vblank\n");
5618 }
5619 break;
5620 case 1: /* D4 vline */
5621 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5622 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5623 DRM_DEBUG("IH: D4 vline\n");
5624 }
5625 break;
5626 default:
5627 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5628 break;
5629 }
5630 break;
5631 case 5: /* D5 vblank/vline */
5632 switch (src_data) {
5633 case 0: /* D5 vblank */
5634 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5635 if (rdev->irq.crtc_vblank_int[4]) {
5636 drm_handle_vblank(rdev->ddev, 4);
5637 rdev->pm.vblank_sync = true;
5638 wake_up(&rdev->irq.vblank_queue);
5639 }
5640 if (atomic_read(&rdev->irq.pflip[4]))
5641 radeon_crtc_handle_flip(rdev, 4);
5642 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5643 DRM_DEBUG("IH: D5 vblank\n");
5644 }
5645 break;
5646 case 1: /* D5 vline */
5647 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5648 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5649 DRM_DEBUG("IH: D5 vline\n");
5650 }
5651 break;
5652 default:
5653 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5654 break;
5655 }
5656 break;
5657 case 6: /* D6 vblank/vline */
5658 switch (src_data) {
5659 case 0: /* D6 vblank */
5660 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5661 if (rdev->irq.crtc_vblank_int[5]) {
5662 drm_handle_vblank(rdev->ddev, 5);
5663 rdev->pm.vblank_sync = true;
5664 wake_up(&rdev->irq.vblank_queue);
5665 }
5666 if (atomic_read(&rdev->irq.pflip[5]))
5667 radeon_crtc_handle_flip(rdev, 5);
5668 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5669 DRM_DEBUG("IH: D6 vblank\n");
5670 }
5671 break;
5672 case 1: /* D6 vline */
5673 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5674 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5675 DRM_DEBUG("IH: D6 vline\n");
5676 }
5677 break;
5678 default:
5679 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5680 break;
5681 }
5682 break;
5683 case 42: /* HPD hotplug */
5684 switch (src_data) {
5685 case 0:
5686 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5687 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5688 queue_hotplug = true;
5689 DRM_DEBUG("IH: HPD1\n");
5690 }
5691 break;
5692 case 1:
5693 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5694 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5695 queue_hotplug = true;
5696 DRM_DEBUG("IH: HPD2\n");
5697 }
5698 break;
5699 case 2:
5700 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5701 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5702 queue_hotplug = true;
5703 DRM_DEBUG("IH: HPD3\n");
5704 }
5705 break;
5706 case 3:
5707 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5708 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5709 queue_hotplug = true;
5710 DRM_DEBUG("IH: HPD4\n");
5711 }
5712 break;
5713 case 4:
5714 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5715 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5716 queue_hotplug = true;
5717 DRM_DEBUG("IH: HPD5\n");
5718 }
5719 break;
5720 case 5:
5721 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5722 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5723 queue_hotplug = true;
5724 DRM_DEBUG("IH: HPD6\n");
5725 }
5726 break;
5727 default:
5728 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5729 break;
5730 }
5731 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005732 case 146:
5733 case 147:
5734 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5735 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5736 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5737 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5738 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5739 /* reset addr and status */
5740 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5741 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005742 case 176: /* GFX RB CP_INT */
5743 case 177: /* GFX IB CP_INT */
5744 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5745 break;
5746 case 181: /* CP EOP event */
5747 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005748 /* XXX check the bitfield order! */
5749 me_id = (ring_id & 0x60) >> 5;
5750 pipe_id = (ring_id & 0x18) >> 3;
5751 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005752 switch (me_id) {
5753 case 0:
5754 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5755 break;
5756 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005757 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005758 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5759 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5760 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5761 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005762 break;
5763 }
5764 break;
5765 case 184: /* CP Privileged reg access */
5766 DRM_ERROR("Illegal register access in command stream\n");
5767 /* XXX check the bitfield order! */
5768 me_id = (ring_id & 0x60) >> 5;
5769 pipe_id = (ring_id & 0x18) >> 3;
5770 queue_id = (ring_id & 0x7) >> 0;
5771 switch (me_id) {
5772 case 0:
5773 /* This results in a full GPU reset, but all we need to do is soft
5774 * reset the CP for gfx
5775 */
5776 queue_reset = true;
5777 break;
5778 case 1:
5779 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005780 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005781 break;
5782 case 2:
5783 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005784 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005785 break;
5786 }
5787 break;
5788 case 185: /* CP Privileged inst */
5789 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005790 /* XXX check the bitfield order! */
5791 me_id = (ring_id & 0x60) >> 5;
5792 pipe_id = (ring_id & 0x18) >> 3;
5793 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005794 switch (me_id) {
5795 case 0:
5796 /* This results in a full GPU reset, but all we need to do is soft
5797 * reset the CP for gfx
5798 */
5799 queue_reset = true;
5800 break;
5801 case 1:
5802 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005803 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005804 break;
5805 case 2:
5806 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005807 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005808 break;
5809 }
5810 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005811 case 224: /* SDMA trap event */
5812 /* XXX check the bitfield order! */
5813 me_id = (ring_id & 0x3) >> 0;
5814 queue_id = (ring_id & 0xc) >> 2;
5815 DRM_DEBUG("IH: SDMA trap\n");
5816 switch (me_id) {
5817 case 0:
5818 switch (queue_id) {
5819 case 0:
5820 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5821 break;
5822 case 1:
5823 /* XXX compute */
5824 break;
5825 case 2:
5826 /* XXX compute */
5827 break;
5828 }
5829 break;
5830 case 1:
5831 switch (queue_id) {
5832 case 0:
5833 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5834 break;
5835 case 1:
5836 /* XXX compute */
5837 break;
5838 case 2:
5839 /* XXX compute */
5840 break;
5841 }
5842 break;
5843 }
5844 break;
5845 case 241: /* SDMA Privileged inst */
5846 case 247: /* SDMA Privileged inst */
5847 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5848 /* XXX check the bitfield order! */
5849 me_id = (ring_id & 0x3) >> 0;
5850 queue_id = (ring_id & 0xc) >> 2;
5851 switch (me_id) {
5852 case 0:
5853 switch (queue_id) {
5854 case 0:
5855 queue_reset = true;
5856 break;
5857 case 1:
5858 /* XXX compute */
5859 queue_reset = true;
5860 break;
5861 case 2:
5862 /* XXX compute */
5863 queue_reset = true;
5864 break;
5865 }
5866 break;
5867 case 1:
5868 switch (queue_id) {
5869 case 0:
5870 queue_reset = true;
5871 break;
5872 case 1:
5873 /* XXX compute */
5874 queue_reset = true;
5875 break;
5876 case 2:
5877 /* XXX compute */
5878 queue_reset = true;
5879 break;
5880 }
5881 break;
5882 }
5883 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005884 case 233: /* GUI IDLE */
5885 DRM_DEBUG("IH: GUI idle\n");
5886 break;
5887 default:
5888 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5889 break;
5890 }
5891
5892 /* wptr/rptr are in bytes! */
5893 rptr += 16;
5894 rptr &= rdev->ih.ptr_mask;
5895 }
5896 if (queue_hotplug)
5897 schedule_work(&rdev->hotplug_work);
5898 if (queue_reset)
5899 schedule_work(&rdev->reset_work);
5900 rdev->ih.rptr = rptr;
5901 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5902 atomic_set(&rdev->ih.lock, 0);
5903
5904 /* make sure wptr hasn't changed while processing */
5905 wptr = cik_get_ih_wptr(rdev);
5906 if (wptr != rptr)
5907 goto restart_ih;
5908
5909 return IRQ_HANDLED;
5910}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005911
5912/*
5913 * startup/shutdown callbacks
5914 */
5915/**
5916 * cik_startup - program the asic to a functional state
5917 *
5918 * @rdev: radeon_device pointer
5919 *
5920 * Programs the asic to a functional state (CIK).
5921 * Called by cik_init() and cik_resume().
5922 * Returns 0 for success, error for failure.
5923 */
5924static int cik_startup(struct radeon_device *rdev)
5925{
5926 struct radeon_ring *ring;
5927 int r;
5928
5929 if (rdev->flags & RADEON_IS_IGP) {
5930 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5931 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5932 r = cik_init_microcode(rdev);
5933 if (r) {
5934 DRM_ERROR("Failed to load firmware!\n");
5935 return r;
5936 }
5937 }
5938 } else {
5939 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5940 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5941 !rdev->mc_fw) {
5942 r = cik_init_microcode(rdev);
5943 if (r) {
5944 DRM_ERROR("Failed to load firmware!\n");
5945 return r;
5946 }
5947 }
5948
5949 r = ci_mc_load_microcode(rdev);
5950 if (r) {
5951 DRM_ERROR("Failed to load MC firmware!\n");
5952 return r;
5953 }
5954 }
5955
5956 r = r600_vram_scratch_init(rdev);
5957 if (r)
5958 return r;
5959
5960 cik_mc_program(rdev);
5961 r = cik_pcie_gart_enable(rdev);
5962 if (r)
5963 return r;
5964 cik_gpu_init(rdev);
5965
5966 /* allocate rlc buffers */
5967 r = si_rlc_init(rdev);
5968 if (r) {
5969 DRM_ERROR("Failed to init rlc BOs!\n");
5970 return r;
5971 }
5972
5973 /* allocate wb buffer */
5974 r = radeon_wb_init(rdev);
5975 if (r)
5976 return r;
5977
Alex Deucher963e81f2013-06-26 17:37:11 -04005978 /* allocate mec buffers */
5979 r = cik_mec_init(rdev);
5980 if (r) {
5981 DRM_ERROR("Failed to init MEC BOs!\n");
5982 return r;
5983 }
5984
Alex Deucher7bf94a22012-08-17 11:48:29 -04005985 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5986 if (r) {
5987 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5988 return r;
5989 }
5990
Alex Deucher963e81f2013-06-26 17:37:11 -04005991 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5992 if (r) {
5993 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5994 return r;
5995 }
5996
5997 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5998 if (r) {
5999 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6000 return r;
6001 }
6002
Alex Deucher7bf94a22012-08-17 11:48:29 -04006003 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6004 if (r) {
6005 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6006 return r;
6007 }
6008
6009 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6010 if (r) {
6011 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6012 return r;
6013 }
6014
Christian König87167bb2013-04-09 13:39:21 -04006015 r = cik_uvd_resume(rdev);
6016 if (!r) {
6017 r = radeon_fence_driver_start_ring(rdev,
6018 R600_RING_TYPE_UVD_INDEX);
6019 if (r)
6020 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6021 }
6022 if (r)
6023 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6024
Alex Deucher7bf94a22012-08-17 11:48:29 -04006025 /* Enable IRQ */
6026 if (!rdev->irq.installed) {
6027 r = radeon_irq_kms_init(rdev);
6028 if (r)
6029 return r;
6030 }
6031
6032 r = cik_irq_init(rdev);
6033 if (r) {
6034 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6035 radeon_irq_kms_fini(rdev);
6036 return r;
6037 }
6038 cik_irq_set(rdev);
6039
6040 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6041 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6042 CP_RB0_RPTR, CP_RB0_WPTR,
6043 0, 0xfffff, RADEON_CP_PACKET2);
6044 if (r)
6045 return r;
6046
Alex Deucher963e81f2013-06-26 17:37:11 -04006047 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006048 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006049 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6050 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6051 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006052 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006053 if (r)
6054 return r;
6055 ring->me = 1; /* first MEC */
6056 ring->pipe = 0; /* first pipe */
6057 ring->queue = 0; /* first queue */
6058 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6059
Alex Deucher2615b532013-06-03 11:21:58 -04006060 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006061 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6062 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6063 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006064 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006065 if (r)
6066 return r;
6067 /* dGPU only have 1 MEC */
6068 ring->me = 1; /* first MEC */
6069 ring->pipe = 0; /* first pipe */
6070 ring->queue = 1; /* second queue */
6071 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6072
Alex Deucher7bf94a22012-08-17 11:48:29 -04006073 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6074 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6075 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6076 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6077 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6078 if (r)
6079 return r;
6080
6081 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6082 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6083 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6084 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6085 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6086 if (r)
6087 return r;
6088
6089 r = cik_cp_resume(rdev);
6090 if (r)
6091 return r;
6092
6093 r = cik_sdma_resume(rdev);
6094 if (r)
6095 return r;
6096
Christian König87167bb2013-04-09 13:39:21 -04006097 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6098 if (ring->ring_size) {
6099 r = radeon_ring_init(rdev, ring, ring->ring_size,
6100 R600_WB_UVD_RPTR_OFFSET,
6101 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6102 0, 0xfffff, RADEON_CP_PACKET2);
6103 if (!r)
6104 r = r600_uvd_init(rdev);
6105 if (r)
6106 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6107 }
6108
Alex Deucher7bf94a22012-08-17 11:48:29 -04006109 r = radeon_ib_pool_init(rdev);
6110 if (r) {
6111 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6112 return r;
6113 }
6114
6115 r = radeon_vm_manager_init(rdev);
6116 if (r) {
6117 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6118 return r;
6119 }
6120
6121 return 0;
6122}
6123
6124/**
6125 * cik_resume - resume the asic to a functional state
6126 *
6127 * @rdev: radeon_device pointer
6128 *
6129 * Programs the asic to a functional state (CIK).
6130 * Called at resume.
6131 * Returns 0 for success, error for failure.
6132 */
6133int cik_resume(struct radeon_device *rdev)
6134{
6135 int r;
6136
6137 /* post card */
6138 atom_asic_init(rdev->mode_info.atom_context);
6139
Alex Deucher0aafd312013-04-09 14:43:30 -04006140 /* init golden registers */
6141 cik_init_golden_registers(rdev);
6142
Alex Deucher7bf94a22012-08-17 11:48:29 -04006143 rdev->accel_working = true;
6144 r = cik_startup(rdev);
6145 if (r) {
6146 DRM_ERROR("cik startup failed on resume\n");
6147 rdev->accel_working = false;
6148 return r;
6149 }
6150
6151 return r;
6152
6153}
6154
6155/**
6156 * cik_suspend - suspend the asic
6157 *
6158 * @rdev: radeon_device pointer
6159 *
6160 * Bring the chip into a state suitable for suspend (CIK).
6161 * Called at suspend.
6162 * Returns 0 for success.
6163 */
6164int cik_suspend(struct radeon_device *rdev)
6165{
6166 radeon_vm_manager_fini(rdev);
6167 cik_cp_enable(rdev, false);
6168 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04006169 r600_uvd_rbc_stop(rdev);
6170 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006171 cik_irq_suspend(rdev);
6172 radeon_wb_disable(rdev);
6173 cik_pcie_gart_disable(rdev);
6174 return 0;
6175}
6176
6177/* Plan is to move initialization in that function and use
6178 * helper function so that radeon_device_init pretty much
6179 * do nothing more than calling asic specific function. This
6180 * should also allow to remove a bunch of callback function
6181 * like vram_info.
6182 */
6183/**
6184 * cik_init - asic specific driver and hw init
6185 *
6186 * @rdev: radeon_device pointer
6187 *
6188 * Setup asic specific driver variables and program the hw
6189 * to a functional state (CIK).
6190 * Called at driver startup.
6191 * Returns 0 for success, errors for failure.
6192 */
6193int cik_init(struct radeon_device *rdev)
6194{
6195 struct radeon_ring *ring;
6196 int r;
6197
6198 /* Read BIOS */
6199 if (!radeon_get_bios(rdev)) {
6200 if (ASIC_IS_AVIVO(rdev))
6201 return -EINVAL;
6202 }
6203 /* Must be an ATOMBIOS */
6204 if (!rdev->is_atom_bios) {
6205 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6206 return -EINVAL;
6207 }
6208 r = radeon_atombios_init(rdev);
6209 if (r)
6210 return r;
6211
6212 /* Post card if necessary */
6213 if (!radeon_card_posted(rdev)) {
6214 if (!rdev->bios) {
6215 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6216 return -EINVAL;
6217 }
6218 DRM_INFO("GPU not posted. posting now...\n");
6219 atom_asic_init(rdev->mode_info.atom_context);
6220 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006221 /* init golden registers */
6222 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006223 /* Initialize scratch registers */
6224 cik_scratch_init(rdev);
6225 /* Initialize surface registers */
6226 radeon_surface_init(rdev);
6227 /* Initialize clocks */
6228 radeon_get_clock_info(rdev->ddev);
6229
6230 /* Fence driver */
6231 r = radeon_fence_driver_init(rdev);
6232 if (r)
6233 return r;
6234
6235 /* initialize memory controller */
6236 r = cik_mc_init(rdev);
6237 if (r)
6238 return r;
6239 /* Memory manager */
6240 r = radeon_bo_init(rdev);
6241 if (r)
6242 return r;
6243
6244 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6245 ring->ring_obj = NULL;
6246 r600_ring_init(rdev, ring, 1024 * 1024);
6247
Alex Deucher963e81f2013-06-26 17:37:11 -04006248 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6249 ring->ring_obj = NULL;
6250 r600_ring_init(rdev, ring, 1024 * 1024);
6251 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6252 if (r)
6253 return r;
6254
6255 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6256 ring->ring_obj = NULL;
6257 r600_ring_init(rdev, ring, 1024 * 1024);
6258 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6259 if (r)
6260 return r;
6261
Alex Deucher7bf94a22012-08-17 11:48:29 -04006262 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6263 ring->ring_obj = NULL;
6264 r600_ring_init(rdev, ring, 256 * 1024);
6265
6266 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6267 ring->ring_obj = NULL;
6268 r600_ring_init(rdev, ring, 256 * 1024);
6269
Christian König87167bb2013-04-09 13:39:21 -04006270 r = radeon_uvd_init(rdev);
6271 if (!r) {
6272 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6273 ring->ring_obj = NULL;
6274 r600_ring_init(rdev, ring, 4096);
6275 }
6276
Alex Deucher7bf94a22012-08-17 11:48:29 -04006277 rdev->ih.ring_obj = NULL;
6278 r600_ih_ring_init(rdev, 64 * 1024);
6279
6280 r = r600_pcie_gart_init(rdev);
6281 if (r)
6282 return r;
6283
6284 rdev->accel_working = true;
6285 r = cik_startup(rdev);
6286 if (r) {
6287 dev_err(rdev->dev, "disabling GPU acceleration\n");
6288 cik_cp_fini(rdev);
6289 cik_sdma_fini(rdev);
6290 cik_irq_fini(rdev);
6291 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006292 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006293 radeon_wb_fini(rdev);
6294 radeon_ib_pool_fini(rdev);
6295 radeon_vm_manager_fini(rdev);
6296 radeon_irq_kms_fini(rdev);
6297 cik_pcie_gart_fini(rdev);
6298 rdev->accel_working = false;
6299 }
6300
6301 /* Don't start up if the MC ucode is missing.
6302 * The default clocks and voltages before the MC ucode
6303 * is loaded are not suffient for advanced operations.
6304 */
6305 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6306 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6307 return -EINVAL;
6308 }
6309
6310 return 0;
6311}
6312
6313/**
6314 * cik_fini - asic specific driver and hw fini
6315 *
6316 * @rdev: radeon_device pointer
6317 *
6318 * Tear down the asic specific driver variables and program the hw
6319 * to an idle state (CIK).
6320 * Called at driver unload.
6321 */
6322void cik_fini(struct radeon_device *rdev)
6323{
6324 cik_cp_fini(rdev);
6325 cik_sdma_fini(rdev);
6326 cik_irq_fini(rdev);
6327 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006328 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006329 radeon_wb_fini(rdev);
6330 radeon_vm_manager_fini(rdev);
6331 radeon_ib_pool_fini(rdev);
6332 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006333 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006334 cik_pcie_gart_fini(rdev);
6335 r600_vram_scratch_fini(rdev);
6336 radeon_gem_fini(rdev);
6337 radeon_fence_driver_fini(rdev);
6338 radeon_bo_fini(rdev);
6339 radeon_atombios_fini(rdev);
6340 kfree(rdev->bios);
6341 rdev->bios = NULL;
6342}
Alex Deuchercd84a272012-07-20 17:13:13 -04006343
6344/* display watermark setup */
6345/**
6346 * dce8_line_buffer_adjust - Set up the line buffer
6347 *
6348 * @rdev: radeon_device pointer
6349 * @radeon_crtc: the selected display controller
6350 * @mode: the current display mode on the selected display
6351 * controller
6352 *
6353 * Setup up the line buffer allocation for
6354 * the selected display controller (CIK).
6355 * Returns the line buffer size in pixels.
6356 */
6357static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6358 struct radeon_crtc *radeon_crtc,
6359 struct drm_display_mode *mode)
6360{
6361 u32 tmp;
6362
6363 /*
6364 * Line Buffer Setup
6365 * There are 6 line buffers, one for each display controllers.
6366 * There are 3 partitions per LB. Select the number of partitions
6367 * to enable based on the display width. For display widths larger
6368 * than 4096, you need use to use 2 display controllers and combine
6369 * them using the stereo blender.
6370 */
6371 if (radeon_crtc->base.enabled && mode) {
6372 if (mode->crtc_hdisplay < 1920)
6373 tmp = 1;
6374 else if (mode->crtc_hdisplay < 2560)
6375 tmp = 2;
6376 else if (mode->crtc_hdisplay < 4096)
6377 tmp = 0;
6378 else {
6379 DRM_DEBUG_KMS("Mode too big for LB!\n");
6380 tmp = 0;
6381 }
6382 } else
6383 tmp = 1;
6384
6385 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6386 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6387
6388 if (radeon_crtc->base.enabled && mode) {
6389 switch (tmp) {
6390 case 0:
6391 default:
6392 return 4096 * 2;
6393 case 1:
6394 return 1920 * 2;
6395 case 2:
6396 return 2560 * 2;
6397 }
6398 }
6399
6400 /* controller not enabled, so no lb used */
6401 return 0;
6402}
6403
6404/**
6405 * cik_get_number_of_dram_channels - get the number of dram channels
6406 *
6407 * @rdev: radeon_device pointer
6408 *
6409 * Look up the number of video ram channels (CIK).
6410 * Used for display watermark bandwidth calculations
6411 * Returns the number of dram channels
6412 */
6413static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6414{
6415 u32 tmp = RREG32(MC_SHARED_CHMAP);
6416
6417 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6418 case 0:
6419 default:
6420 return 1;
6421 case 1:
6422 return 2;
6423 case 2:
6424 return 4;
6425 case 3:
6426 return 8;
6427 case 4:
6428 return 3;
6429 case 5:
6430 return 6;
6431 case 6:
6432 return 10;
6433 case 7:
6434 return 12;
6435 case 8:
6436 return 16;
6437 }
6438}
6439
6440struct dce8_wm_params {
6441 u32 dram_channels; /* number of dram channels */
6442 u32 yclk; /* bandwidth per dram data pin in kHz */
6443 u32 sclk; /* engine clock in kHz */
6444 u32 disp_clk; /* display clock in kHz */
6445 u32 src_width; /* viewport width */
6446 u32 active_time; /* active display time in ns */
6447 u32 blank_time; /* blank time in ns */
6448 bool interlaced; /* mode is interlaced */
6449 fixed20_12 vsc; /* vertical scale ratio */
6450 u32 num_heads; /* number of active crtcs */
6451 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6452 u32 lb_size; /* line buffer allocated to pipe */
6453 u32 vtaps; /* vertical scaler taps */
6454};
6455
6456/**
6457 * dce8_dram_bandwidth - get the dram bandwidth
6458 *
6459 * @wm: watermark calculation data
6460 *
6461 * Calculate the raw dram bandwidth (CIK).
6462 * Used for display watermark bandwidth calculations
6463 * Returns the dram bandwidth in MBytes/s
6464 */
6465static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6466{
6467 /* Calculate raw DRAM Bandwidth */
6468 fixed20_12 dram_efficiency; /* 0.7 */
6469 fixed20_12 yclk, dram_channels, bandwidth;
6470 fixed20_12 a;
6471
6472 a.full = dfixed_const(1000);
6473 yclk.full = dfixed_const(wm->yclk);
6474 yclk.full = dfixed_div(yclk, a);
6475 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6476 a.full = dfixed_const(10);
6477 dram_efficiency.full = dfixed_const(7);
6478 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6479 bandwidth.full = dfixed_mul(dram_channels, yclk);
6480 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6481
6482 return dfixed_trunc(bandwidth);
6483}
6484
6485/**
6486 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6487 *
6488 * @wm: watermark calculation data
6489 *
6490 * Calculate the dram bandwidth used for display (CIK).
6491 * Used for display watermark bandwidth calculations
6492 * Returns the dram bandwidth for display in MBytes/s
6493 */
6494static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6495{
6496 /* Calculate DRAM Bandwidth and the part allocated to display. */
6497 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6498 fixed20_12 yclk, dram_channels, bandwidth;
6499 fixed20_12 a;
6500
6501 a.full = dfixed_const(1000);
6502 yclk.full = dfixed_const(wm->yclk);
6503 yclk.full = dfixed_div(yclk, a);
6504 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6505 a.full = dfixed_const(10);
6506 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6507 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6508 bandwidth.full = dfixed_mul(dram_channels, yclk);
6509 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6510
6511 return dfixed_trunc(bandwidth);
6512}
6513
6514/**
6515 * dce8_data_return_bandwidth - get the data return bandwidth
6516 *
6517 * @wm: watermark calculation data
6518 *
6519 * Calculate the data return bandwidth used for display (CIK).
6520 * Used for display watermark bandwidth calculations
6521 * Returns the data return bandwidth in MBytes/s
6522 */
6523static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6524{
6525 /* Calculate the display Data return Bandwidth */
6526 fixed20_12 return_efficiency; /* 0.8 */
6527 fixed20_12 sclk, bandwidth;
6528 fixed20_12 a;
6529
6530 a.full = dfixed_const(1000);
6531 sclk.full = dfixed_const(wm->sclk);
6532 sclk.full = dfixed_div(sclk, a);
6533 a.full = dfixed_const(10);
6534 return_efficiency.full = dfixed_const(8);
6535 return_efficiency.full = dfixed_div(return_efficiency, a);
6536 a.full = dfixed_const(32);
6537 bandwidth.full = dfixed_mul(a, sclk);
6538 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6539
6540 return dfixed_trunc(bandwidth);
6541}
6542
6543/**
6544 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6545 *
6546 * @wm: watermark calculation data
6547 *
6548 * Calculate the dmif bandwidth used for display (CIK).
6549 * Used for display watermark bandwidth calculations
6550 * Returns the dmif bandwidth in MBytes/s
6551 */
6552static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6553{
6554 /* Calculate the DMIF Request Bandwidth */
6555 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6556 fixed20_12 disp_clk, bandwidth;
6557 fixed20_12 a, b;
6558
6559 a.full = dfixed_const(1000);
6560 disp_clk.full = dfixed_const(wm->disp_clk);
6561 disp_clk.full = dfixed_div(disp_clk, a);
6562 a.full = dfixed_const(32);
6563 b.full = dfixed_mul(a, disp_clk);
6564
6565 a.full = dfixed_const(10);
6566 disp_clk_request_efficiency.full = dfixed_const(8);
6567 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6568
6569 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6570
6571 return dfixed_trunc(bandwidth);
6572}
6573
6574/**
6575 * dce8_available_bandwidth - get the min available bandwidth
6576 *
6577 * @wm: watermark calculation data
6578 *
6579 * Calculate the min available bandwidth used for display (CIK).
6580 * Used for display watermark bandwidth calculations
6581 * Returns the min available bandwidth in MBytes/s
6582 */
6583static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6584{
6585 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6586 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6587 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6588 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6589
6590 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6591}
6592
6593/**
6594 * dce8_average_bandwidth - get the average available bandwidth
6595 *
6596 * @wm: watermark calculation data
6597 *
6598 * Calculate the average available bandwidth used for display (CIK).
6599 * Used for display watermark bandwidth calculations
6600 * Returns the average available bandwidth in MBytes/s
6601 */
6602static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6603{
6604 /* Calculate the display mode Average Bandwidth
6605 * DisplayMode should contain the source and destination dimensions,
6606 * timing, etc.
6607 */
6608 fixed20_12 bpp;
6609 fixed20_12 line_time;
6610 fixed20_12 src_width;
6611 fixed20_12 bandwidth;
6612 fixed20_12 a;
6613
6614 a.full = dfixed_const(1000);
6615 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6616 line_time.full = dfixed_div(line_time, a);
6617 bpp.full = dfixed_const(wm->bytes_per_pixel);
6618 src_width.full = dfixed_const(wm->src_width);
6619 bandwidth.full = dfixed_mul(src_width, bpp);
6620 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6621 bandwidth.full = dfixed_div(bandwidth, line_time);
6622
6623 return dfixed_trunc(bandwidth);
6624}
6625
6626/**
6627 * dce8_latency_watermark - get the latency watermark
6628 *
6629 * @wm: watermark calculation data
6630 *
6631 * Calculate the latency watermark (CIK).
6632 * Used for display watermark bandwidth calculations
6633 * Returns the latency watermark in ns
6634 */
6635static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6636{
6637 /* First calculate the latency in ns */
6638 u32 mc_latency = 2000; /* 2000 ns. */
6639 u32 available_bandwidth = dce8_available_bandwidth(wm);
6640 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6641 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6642 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6643 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6644 (wm->num_heads * cursor_line_pair_return_time);
6645 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6646 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6647 u32 tmp, dmif_size = 12288;
6648 fixed20_12 a, b, c;
6649
6650 if (wm->num_heads == 0)
6651 return 0;
6652
6653 a.full = dfixed_const(2);
6654 b.full = dfixed_const(1);
6655 if ((wm->vsc.full > a.full) ||
6656 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6657 (wm->vtaps >= 5) ||
6658 ((wm->vsc.full >= a.full) && wm->interlaced))
6659 max_src_lines_per_dst_line = 4;
6660 else
6661 max_src_lines_per_dst_line = 2;
6662
6663 a.full = dfixed_const(available_bandwidth);
6664 b.full = dfixed_const(wm->num_heads);
6665 a.full = dfixed_div(a, b);
6666
6667 b.full = dfixed_const(mc_latency + 512);
6668 c.full = dfixed_const(wm->disp_clk);
6669 b.full = dfixed_div(b, c);
6670
6671 c.full = dfixed_const(dmif_size);
6672 b.full = dfixed_div(c, b);
6673
6674 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6675
6676 b.full = dfixed_const(1000);
6677 c.full = dfixed_const(wm->disp_clk);
6678 b.full = dfixed_div(c, b);
6679 c.full = dfixed_const(wm->bytes_per_pixel);
6680 b.full = dfixed_mul(b, c);
6681
6682 lb_fill_bw = min(tmp, dfixed_trunc(b));
6683
6684 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6685 b.full = dfixed_const(1000);
6686 c.full = dfixed_const(lb_fill_bw);
6687 b.full = dfixed_div(c, b);
6688 a.full = dfixed_div(a, b);
6689 line_fill_time = dfixed_trunc(a);
6690
6691 if (line_fill_time < wm->active_time)
6692 return latency;
6693 else
6694 return latency + (line_fill_time - wm->active_time);
6695
6696}
6697
6698/**
6699 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6700 * average and available dram bandwidth
6701 *
6702 * @wm: watermark calculation data
6703 *
6704 * Check if the display average bandwidth fits in the display
6705 * dram bandwidth (CIK).
6706 * Used for display watermark bandwidth calculations
6707 * Returns true if the display fits, false if not.
6708 */
6709static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6710{
6711 if (dce8_average_bandwidth(wm) <=
6712 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6713 return true;
6714 else
6715 return false;
6716}
6717
6718/**
6719 * dce8_average_bandwidth_vs_available_bandwidth - check
6720 * average and available bandwidth
6721 *
6722 * @wm: watermark calculation data
6723 *
6724 * Check if the display average bandwidth fits in the display
6725 * available bandwidth (CIK).
6726 * Used for display watermark bandwidth calculations
6727 * Returns true if the display fits, false if not.
6728 */
6729static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6730{
6731 if (dce8_average_bandwidth(wm) <=
6732 (dce8_available_bandwidth(wm) / wm->num_heads))
6733 return true;
6734 else
6735 return false;
6736}
6737
6738/**
6739 * dce8_check_latency_hiding - check latency hiding
6740 *
6741 * @wm: watermark calculation data
6742 *
6743 * Check latency hiding (CIK).
6744 * Used for display watermark bandwidth calculations
6745 * Returns true if the display fits, false if not.
6746 */
6747static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6748{
6749 u32 lb_partitions = wm->lb_size / wm->src_width;
6750 u32 line_time = wm->active_time + wm->blank_time;
6751 u32 latency_tolerant_lines;
6752 u32 latency_hiding;
6753 fixed20_12 a;
6754
6755 a.full = dfixed_const(1);
6756 if (wm->vsc.full > a.full)
6757 latency_tolerant_lines = 1;
6758 else {
6759 if (lb_partitions <= (wm->vtaps + 1))
6760 latency_tolerant_lines = 1;
6761 else
6762 latency_tolerant_lines = 2;
6763 }
6764
6765 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6766
6767 if (dce8_latency_watermark(wm) <= latency_hiding)
6768 return true;
6769 else
6770 return false;
6771}
6772
6773/**
6774 * dce8_program_watermarks - program display watermarks
6775 *
6776 * @rdev: radeon_device pointer
6777 * @radeon_crtc: the selected display controller
6778 * @lb_size: line buffer size
6779 * @num_heads: number of display controllers in use
6780 *
6781 * Calculate and program the display watermarks for the
6782 * selected display controller (CIK).
6783 */
6784static void dce8_program_watermarks(struct radeon_device *rdev,
6785 struct radeon_crtc *radeon_crtc,
6786 u32 lb_size, u32 num_heads)
6787{
6788 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6789 struct dce8_wm_params wm;
6790 u32 pixel_period;
6791 u32 line_time = 0;
6792 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6793 u32 tmp, wm_mask;
6794
6795 if (radeon_crtc->base.enabled && num_heads && mode) {
6796 pixel_period = 1000000 / (u32)mode->clock;
6797 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6798
6799 wm.yclk = rdev->pm.current_mclk * 10;
6800 wm.sclk = rdev->pm.current_sclk * 10;
6801 wm.disp_clk = mode->clock;
6802 wm.src_width = mode->crtc_hdisplay;
6803 wm.active_time = mode->crtc_hdisplay * pixel_period;
6804 wm.blank_time = line_time - wm.active_time;
6805 wm.interlaced = false;
6806 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6807 wm.interlaced = true;
6808 wm.vsc = radeon_crtc->vsc;
6809 wm.vtaps = 1;
6810 if (radeon_crtc->rmx_type != RMX_OFF)
6811 wm.vtaps = 2;
6812 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6813 wm.lb_size = lb_size;
6814 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6815 wm.num_heads = num_heads;
6816
6817 /* set for high clocks */
6818 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6819 /* set for low clocks */
6820 /* wm.yclk = low clk; wm.sclk = low clk */
6821 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6822
6823 /* possibly force display priority to high */
6824 /* should really do this at mode validation time... */
6825 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6826 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6827 !dce8_check_latency_hiding(&wm) ||
6828 (rdev->disp_priority == 2)) {
6829 DRM_DEBUG_KMS("force priority to high\n");
6830 }
6831 }
6832
6833 /* select wm A */
6834 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6835 tmp = wm_mask;
6836 tmp &= ~LATENCY_WATERMARK_MASK(3);
6837 tmp |= LATENCY_WATERMARK_MASK(1);
6838 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6839 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6840 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6841 LATENCY_HIGH_WATERMARK(line_time)));
6842 /* select wm B */
6843 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6844 tmp &= ~LATENCY_WATERMARK_MASK(3);
6845 tmp |= LATENCY_WATERMARK_MASK(2);
6846 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6847 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6848 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6849 LATENCY_HIGH_WATERMARK(line_time)));
6850 /* restore original selection */
6851 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6852}
6853
6854/**
6855 * dce8_bandwidth_update - program display watermarks
6856 *
6857 * @rdev: radeon_device pointer
6858 *
6859 * Calculate and program the display watermarks and line
6860 * buffer allocation (CIK).
6861 */
6862void dce8_bandwidth_update(struct radeon_device *rdev)
6863{
6864 struct drm_display_mode *mode = NULL;
6865 u32 num_heads = 0, lb_size;
6866 int i;
6867
6868 radeon_update_display_priority(rdev);
6869
6870 for (i = 0; i < rdev->num_crtc; i++) {
6871 if (rdev->mode_info.crtcs[i]->base.enabled)
6872 num_heads++;
6873 }
6874 for (i = 0; i < rdev->num_crtc; i++) {
6875 mode = &rdev->mode_info.crtcs[i]->base.mode;
6876 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6877 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6878 }
6879}
Alex Deucher44fa3462012-12-18 22:17:00 -05006880
6881/**
6882 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6883 *
6884 * @rdev: radeon_device pointer
6885 *
6886 * Fetches a GPU clock counter snapshot (SI).
6887 * Returns the 64 bit clock counter snapshot.
6888 */
6889uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6890{
6891 uint64_t clock;
6892
6893 mutex_lock(&rdev->gpu_clock_mutex);
6894 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6895 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6896 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6897 mutex_unlock(&rdev->gpu_clock_mutex);
6898 return clock;
6899}
6900
Christian König87167bb2013-04-09 13:39:21 -04006901static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6902 u32 cntl_reg, u32 status_reg)
6903{
6904 int r, i;
6905 struct atom_clock_dividers dividers;
6906 uint32_t tmp;
6907
6908 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6909 clock, false, &dividers);
6910 if (r)
6911 return r;
6912
6913 tmp = RREG32_SMC(cntl_reg);
6914 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6915 tmp |= dividers.post_divider;
6916 WREG32_SMC(cntl_reg, tmp);
6917
6918 for (i = 0; i < 100; i++) {
6919 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6920 break;
6921 mdelay(10);
6922 }
6923 if (i == 100)
6924 return -ETIMEDOUT;
6925
6926 return 0;
6927}
6928
6929int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6930{
6931 int r = 0;
6932
6933 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6934 if (r)
6935 return r;
6936
6937 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6938 return r;
6939}
6940
6941int cik_uvd_resume(struct radeon_device *rdev)
6942{
6943 uint64_t addr;
6944 uint32_t size;
6945 int r;
6946
6947 r = radeon_uvd_resume(rdev);
6948 if (r)
6949 return r;
6950
6951 /* programm the VCPU memory controller bits 0-27 */
6952 addr = rdev->uvd.gpu_addr >> 3;
6953 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6954 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6955 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6956
6957 addr += size;
6958 size = RADEON_UVD_STACK_SIZE >> 3;
6959 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6960 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6961
6962 addr += size;
6963 size = RADEON_UVD_HEAP_SIZE >> 3;
6964 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6965 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6966
6967 /* bits 28-31 */
6968 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6969 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6970
6971 /* bits 32-39 */
6972 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6973 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6974
6975 return 0;
6976}