blob: e867d95a92a04afe5f33efa60945a2dd7b8efec0 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/platform_device.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28#include "drmP.h"
29#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040030#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040031#include "cikd.h"
32#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050033#include "cik_blit_shaders.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040034
Alex Deucher02c81322012-12-18 21:43:07 -050035/* GFX */
36#define CIK_PFP_UCODE_SIZE 2144
37#define CIK_ME_UCODE_SIZE 2144
38#define CIK_CE_UCODE_SIZE 2144
39/* compute */
40#define CIK_MEC_UCODE_SIZE 4192
41/* interrupts */
42#define BONAIRE_RLC_UCODE_SIZE 2048
43#define KB_RLC_UCODE_SIZE 2560
44#define KV_RLC_UCODE_SIZE 2560
45/* gddr controller */
46#define CIK_MC_UCODE_SIZE 7866
Alex Deucher21a93e12013-04-09 12:47:11 -040047/* sdma */
48#define CIK_SDMA_UCODE_SIZE 1050
49#define CIK_SDMA_UCODE_VERSION 64
Alex Deucher02c81322012-12-18 21:43:07 -050050
51MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040057MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050058MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040063MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050064MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65MODULE_FIRMWARE("radeon/KABINI_me.bin");
66MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040069MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050070
Alex Deuchera59781b2012-11-09 10:45:57 -050071extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040073extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040075extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040076extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040077extern void si_rlc_fini(struct radeon_device *rdev);
78extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040079static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040080
Alex Deucher6e2c3c02013-04-03 19:28:32 -040081/*
82 * Indirect registers accessor
83 */
84u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85{
86 u32 r;
87
88 WREG32(PCIE_INDEX, reg);
89 (void)RREG32(PCIE_INDEX);
90 r = RREG32(PCIE_DATA);
91 return r;
92}
93
94void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95{
96 WREG32(PCIE_INDEX, reg);
97 (void)RREG32(PCIE_INDEX);
98 WREG32(PCIE_DATA, v);
99 (void)RREG32(PCIE_DATA);
100}
101
Alex Deucher0aafd312013-04-09 14:43:30 -0400102static const u32 bonaire_golden_spm_registers[] =
103{
104 0x30800, 0xe0ffffff, 0xe0000000
105};
106
107static const u32 bonaire_golden_common_registers[] =
108{
109 0xc770, 0xffffffff, 0x00000800,
110 0xc774, 0xffffffff, 0x00000800,
111 0xc798, 0xffffffff, 0x00007fbf,
112 0xc79c, 0xffffffff, 0x00007faf
113};
114
115static const u32 bonaire_golden_registers[] =
116{
117 0x3354, 0x00000333, 0x00000333,
118 0x3350, 0x000c0fc0, 0x00040200,
119 0x9a10, 0x00010000, 0x00058208,
120 0x3c000, 0xffff1fff, 0x00140000,
121 0x3c200, 0xfdfc0fff, 0x00000100,
122 0x3c234, 0x40000000, 0x40000200,
123 0x9830, 0xffffffff, 0x00000000,
124 0x9834, 0xf00fffff, 0x00000400,
125 0x9838, 0x0002021c, 0x00020200,
126 0xc78, 0x00000080, 0x00000000,
127 0x5bb0, 0x000000f0, 0x00000070,
128 0x5bc0, 0xf0311fff, 0x80300000,
129 0x98f8, 0x73773777, 0x12010001,
130 0x350c, 0x00810000, 0x408af000,
131 0x7030, 0x31000111, 0x00000011,
132 0x2f48, 0x73773777, 0x12010001,
133 0x220c, 0x00007fb6, 0x0021a1b1,
134 0x2210, 0x00007fb6, 0x002021b1,
135 0x2180, 0x00007fb6, 0x00002191,
136 0x2218, 0x00007fb6, 0x002121b1,
137 0x221c, 0x00007fb6, 0x002021b1,
138 0x21dc, 0x00007fb6, 0x00002191,
139 0x21e0, 0x00007fb6, 0x00002191,
140 0x3628, 0x0000003f, 0x0000000a,
141 0x362c, 0x0000003f, 0x0000000a,
142 0x2ae4, 0x00073ffe, 0x000022a2,
143 0x240c, 0x000007ff, 0x00000000,
144 0x8a14, 0xf000003f, 0x00000007,
145 0x8bf0, 0x00002001, 0x00000001,
146 0x8b24, 0xffffffff, 0x00ffffff,
147 0x30a04, 0x0000ff0f, 0x00000000,
148 0x28a4c, 0x07ffffff, 0x06000000,
149 0x4d8, 0x00000fff, 0x00000100,
150 0x3e78, 0x00000001, 0x00000002,
151 0x9100, 0x03000000, 0x0362c688,
152 0x8c00, 0x000000ff, 0x00000001,
153 0xe40, 0x00001fff, 0x00001fff,
154 0x9060, 0x0000007f, 0x00000020,
155 0x9508, 0x00010000, 0x00010000,
156 0xac14, 0x000003ff, 0x000000f3,
157 0xac0c, 0xffffffff, 0x00001032
158};
159
160static const u32 bonaire_mgcg_cgcg_init[] =
161{
162 0xc420, 0xffffffff, 0xfffffffc,
163 0x30800, 0xffffffff, 0xe0000000,
164 0x3c2a0, 0xffffffff, 0x00000100,
165 0x3c208, 0xffffffff, 0x00000100,
166 0x3c2c0, 0xffffffff, 0xc0000100,
167 0x3c2c8, 0xffffffff, 0xc0000100,
168 0x3c2c4, 0xffffffff, 0xc0000100,
169 0x55e4, 0xffffffff, 0x00600100,
170 0x3c280, 0xffffffff, 0x00000100,
171 0x3c214, 0xffffffff, 0x06000100,
172 0x3c220, 0xffffffff, 0x00000100,
173 0x3c218, 0xffffffff, 0x06000100,
174 0x3c204, 0xffffffff, 0x00000100,
175 0x3c2e0, 0xffffffff, 0x00000100,
176 0x3c224, 0xffffffff, 0x00000100,
177 0x3c200, 0xffffffff, 0x00000100,
178 0x3c230, 0xffffffff, 0x00000100,
179 0x3c234, 0xffffffff, 0x00000100,
180 0x3c250, 0xffffffff, 0x00000100,
181 0x3c254, 0xffffffff, 0x00000100,
182 0x3c258, 0xffffffff, 0x00000100,
183 0x3c25c, 0xffffffff, 0x00000100,
184 0x3c260, 0xffffffff, 0x00000100,
185 0x3c27c, 0xffffffff, 0x00000100,
186 0x3c278, 0xffffffff, 0x00000100,
187 0x3c210, 0xffffffff, 0x06000100,
188 0x3c290, 0xffffffff, 0x00000100,
189 0x3c274, 0xffffffff, 0x00000100,
190 0x3c2b4, 0xffffffff, 0x00000100,
191 0x3c2b0, 0xffffffff, 0x00000100,
192 0x3c270, 0xffffffff, 0x00000100,
193 0x30800, 0xffffffff, 0xe0000000,
194 0x3c020, 0xffffffff, 0x00010000,
195 0x3c024, 0xffffffff, 0x00030002,
196 0x3c028, 0xffffffff, 0x00040007,
197 0x3c02c, 0xffffffff, 0x00060005,
198 0x3c030, 0xffffffff, 0x00090008,
199 0x3c034, 0xffffffff, 0x00010000,
200 0x3c038, 0xffffffff, 0x00030002,
201 0x3c03c, 0xffffffff, 0x00040007,
202 0x3c040, 0xffffffff, 0x00060005,
203 0x3c044, 0xffffffff, 0x00090008,
204 0x3c048, 0xffffffff, 0x00010000,
205 0x3c04c, 0xffffffff, 0x00030002,
206 0x3c050, 0xffffffff, 0x00040007,
207 0x3c054, 0xffffffff, 0x00060005,
208 0x3c058, 0xffffffff, 0x00090008,
209 0x3c05c, 0xffffffff, 0x00010000,
210 0x3c060, 0xffffffff, 0x00030002,
211 0x3c064, 0xffffffff, 0x00040007,
212 0x3c068, 0xffffffff, 0x00060005,
213 0x3c06c, 0xffffffff, 0x00090008,
214 0x3c070, 0xffffffff, 0x00010000,
215 0x3c074, 0xffffffff, 0x00030002,
216 0x3c078, 0xffffffff, 0x00040007,
217 0x3c07c, 0xffffffff, 0x00060005,
218 0x3c080, 0xffffffff, 0x00090008,
219 0x3c084, 0xffffffff, 0x00010000,
220 0x3c088, 0xffffffff, 0x00030002,
221 0x3c08c, 0xffffffff, 0x00040007,
222 0x3c090, 0xffffffff, 0x00060005,
223 0x3c094, 0xffffffff, 0x00090008,
224 0x3c098, 0xffffffff, 0x00010000,
225 0x3c09c, 0xffffffff, 0x00030002,
226 0x3c0a0, 0xffffffff, 0x00040007,
227 0x3c0a4, 0xffffffff, 0x00060005,
228 0x3c0a8, 0xffffffff, 0x00090008,
229 0x3c000, 0xffffffff, 0x96e00200,
230 0x8708, 0xffffffff, 0x00900100,
231 0xc424, 0xffffffff, 0x0020003f,
232 0x38, 0xffffffff, 0x0140001c,
233 0x3c, 0x000f0000, 0x000f0000,
234 0x220, 0xffffffff, 0xC060000C,
235 0x224, 0xc0000fff, 0x00000100,
236 0xf90, 0xffffffff, 0x00000100,
237 0xf98, 0x00000101, 0x00000000,
238 0x20a8, 0xffffffff, 0x00000104,
239 0x55e4, 0xff000fff, 0x00000100,
240 0x30cc, 0xc0000fff, 0x00000104,
241 0xc1e4, 0x00000001, 0x00000001,
242 0xd00c, 0xff000ff0, 0x00000100,
243 0xd80c, 0xff000ff0, 0x00000100
244};
245
246static const u32 spectre_golden_spm_registers[] =
247{
248 0x30800, 0xe0ffffff, 0xe0000000
249};
250
251static const u32 spectre_golden_common_registers[] =
252{
253 0xc770, 0xffffffff, 0x00000800,
254 0xc774, 0xffffffff, 0x00000800,
255 0xc798, 0xffffffff, 0x00007fbf,
256 0xc79c, 0xffffffff, 0x00007faf
257};
258
259static const u32 spectre_golden_registers[] =
260{
261 0x3c000, 0xffff1fff, 0x96940200,
262 0x3c00c, 0xffff0001, 0xff000000,
263 0x3c200, 0xfffc0fff, 0x00000100,
264 0x6ed8, 0x00010101, 0x00010000,
265 0x9834, 0xf00fffff, 0x00000400,
266 0x9838, 0xfffffffc, 0x00020200,
267 0x5bb0, 0x000000f0, 0x00000070,
268 0x5bc0, 0xf0311fff, 0x80300000,
269 0x98f8, 0x73773777, 0x12010001,
270 0x9b7c, 0x00ff0000, 0x00fc0000,
271 0x2f48, 0x73773777, 0x12010001,
272 0x8a14, 0xf000003f, 0x00000007,
273 0x8b24, 0xffffffff, 0x00ffffff,
274 0x28350, 0x3f3f3fff, 0x00000082,
275 0x28355, 0x0000003f, 0x00000000,
276 0x3e78, 0x00000001, 0x00000002,
277 0x913c, 0xffff03df, 0x00000004,
278 0xc768, 0x00000008, 0x00000008,
279 0x8c00, 0x000008ff, 0x00000800,
280 0x9508, 0x00010000, 0x00010000,
281 0xac0c, 0xffffffff, 0x54763210,
282 0x214f8, 0x01ff01ff, 0x00000002,
283 0x21498, 0x007ff800, 0x00200000,
284 0x2015c, 0xffffffff, 0x00000f40,
285 0x30934, 0xffffffff, 0x00000001
286};
287
288static const u32 spectre_mgcg_cgcg_init[] =
289{
290 0xc420, 0xffffffff, 0xfffffffc,
291 0x30800, 0xffffffff, 0xe0000000,
292 0x3c2a0, 0xffffffff, 0x00000100,
293 0x3c208, 0xffffffff, 0x00000100,
294 0x3c2c0, 0xffffffff, 0x00000100,
295 0x3c2c8, 0xffffffff, 0x00000100,
296 0x3c2c4, 0xffffffff, 0x00000100,
297 0x55e4, 0xffffffff, 0x00600100,
298 0x3c280, 0xffffffff, 0x00000100,
299 0x3c214, 0xffffffff, 0x06000100,
300 0x3c220, 0xffffffff, 0x00000100,
301 0x3c218, 0xffffffff, 0x06000100,
302 0x3c204, 0xffffffff, 0x00000100,
303 0x3c2e0, 0xffffffff, 0x00000100,
304 0x3c224, 0xffffffff, 0x00000100,
305 0x3c200, 0xffffffff, 0x00000100,
306 0x3c230, 0xffffffff, 0x00000100,
307 0x3c234, 0xffffffff, 0x00000100,
308 0x3c250, 0xffffffff, 0x00000100,
309 0x3c254, 0xffffffff, 0x00000100,
310 0x3c258, 0xffffffff, 0x00000100,
311 0x3c25c, 0xffffffff, 0x00000100,
312 0x3c260, 0xffffffff, 0x00000100,
313 0x3c27c, 0xffffffff, 0x00000100,
314 0x3c278, 0xffffffff, 0x00000100,
315 0x3c210, 0xffffffff, 0x06000100,
316 0x3c290, 0xffffffff, 0x00000100,
317 0x3c274, 0xffffffff, 0x00000100,
318 0x3c2b4, 0xffffffff, 0x00000100,
319 0x3c2b0, 0xffffffff, 0x00000100,
320 0x3c270, 0xffffffff, 0x00000100,
321 0x30800, 0xffffffff, 0xe0000000,
322 0x3c020, 0xffffffff, 0x00010000,
323 0x3c024, 0xffffffff, 0x00030002,
324 0x3c028, 0xffffffff, 0x00040007,
325 0x3c02c, 0xffffffff, 0x00060005,
326 0x3c030, 0xffffffff, 0x00090008,
327 0x3c034, 0xffffffff, 0x00010000,
328 0x3c038, 0xffffffff, 0x00030002,
329 0x3c03c, 0xffffffff, 0x00040007,
330 0x3c040, 0xffffffff, 0x00060005,
331 0x3c044, 0xffffffff, 0x00090008,
332 0x3c048, 0xffffffff, 0x00010000,
333 0x3c04c, 0xffffffff, 0x00030002,
334 0x3c050, 0xffffffff, 0x00040007,
335 0x3c054, 0xffffffff, 0x00060005,
336 0x3c058, 0xffffffff, 0x00090008,
337 0x3c05c, 0xffffffff, 0x00010000,
338 0x3c060, 0xffffffff, 0x00030002,
339 0x3c064, 0xffffffff, 0x00040007,
340 0x3c068, 0xffffffff, 0x00060005,
341 0x3c06c, 0xffffffff, 0x00090008,
342 0x3c070, 0xffffffff, 0x00010000,
343 0x3c074, 0xffffffff, 0x00030002,
344 0x3c078, 0xffffffff, 0x00040007,
345 0x3c07c, 0xffffffff, 0x00060005,
346 0x3c080, 0xffffffff, 0x00090008,
347 0x3c084, 0xffffffff, 0x00010000,
348 0x3c088, 0xffffffff, 0x00030002,
349 0x3c08c, 0xffffffff, 0x00040007,
350 0x3c090, 0xffffffff, 0x00060005,
351 0x3c094, 0xffffffff, 0x00090008,
352 0x3c098, 0xffffffff, 0x00010000,
353 0x3c09c, 0xffffffff, 0x00030002,
354 0x3c0a0, 0xffffffff, 0x00040007,
355 0x3c0a4, 0xffffffff, 0x00060005,
356 0x3c0a8, 0xffffffff, 0x00090008,
357 0x3c0ac, 0xffffffff, 0x00010000,
358 0x3c0b0, 0xffffffff, 0x00030002,
359 0x3c0b4, 0xffffffff, 0x00040007,
360 0x3c0b8, 0xffffffff, 0x00060005,
361 0x3c0bc, 0xffffffff, 0x00090008,
362 0x3c000, 0xffffffff, 0x96e00200,
363 0x8708, 0xffffffff, 0x00900100,
364 0xc424, 0xffffffff, 0x0020003f,
365 0x38, 0xffffffff, 0x0140001c,
366 0x3c, 0x000f0000, 0x000f0000,
367 0x220, 0xffffffff, 0xC060000C,
368 0x224, 0xc0000fff, 0x00000100,
369 0xf90, 0xffffffff, 0x00000100,
370 0xf98, 0x00000101, 0x00000000,
371 0x20a8, 0xffffffff, 0x00000104,
372 0x55e4, 0xff000fff, 0x00000100,
373 0x30cc, 0xc0000fff, 0x00000104,
374 0xc1e4, 0x00000001, 0x00000001,
375 0xd00c, 0xff000ff0, 0x00000100,
376 0xd80c, 0xff000ff0, 0x00000100
377};
378
379static const u32 kalindi_golden_spm_registers[] =
380{
381 0x30800, 0xe0ffffff, 0xe0000000
382};
383
384static const u32 kalindi_golden_common_registers[] =
385{
386 0xc770, 0xffffffff, 0x00000800,
387 0xc774, 0xffffffff, 0x00000800,
388 0xc798, 0xffffffff, 0x00007fbf,
389 0xc79c, 0xffffffff, 0x00007faf
390};
391
392static const u32 kalindi_golden_registers[] =
393{
394 0x3c000, 0xffffdfff, 0x6e944040,
395 0x55e4, 0xff607fff, 0xfc000100,
396 0x3c220, 0xff000fff, 0x00000100,
397 0x3c224, 0xff000fff, 0x00000100,
398 0x3c200, 0xfffc0fff, 0x00000100,
399 0x6ed8, 0x00010101, 0x00010000,
400 0x9830, 0xffffffff, 0x00000000,
401 0x9834, 0xf00fffff, 0x00000400,
402 0x5bb0, 0x000000f0, 0x00000070,
403 0x5bc0, 0xf0311fff, 0x80300000,
404 0x98f8, 0x73773777, 0x12010001,
405 0x98fc, 0xffffffff, 0x00000010,
406 0x9b7c, 0x00ff0000, 0x00fc0000,
407 0x8030, 0x00001f0f, 0x0000100a,
408 0x2f48, 0x73773777, 0x12010001,
409 0x2408, 0x000fffff, 0x000c007f,
410 0x8a14, 0xf000003f, 0x00000007,
411 0x8b24, 0x3fff3fff, 0x00ffcfff,
412 0x30a04, 0x0000ff0f, 0x00000000,
413 0x28a4c, 0x07ffffff, 0x06000000,
414 0x4d8, 0x00000fff, 0x00000100,
415 0x3e78, 0x00000001, 0x00000002,
416 0xc768, 0x00000008, 0x00000008,
417 0x8c00, 0x000000ff, 0x00000003,
418 0x214f8, 0x01ff01ff, 0x00000002,
419 0x21498, 0x007ff800, 0x00200000,
420 0x2015c, 0xffffffff, 0x00000f40,
421 0x88c4, 0x001f3ae3, 0x00000082,
422 0x88d4, 0x0000001f, 0x00000010,
423 0x30934, 0xffffffff, 0x00000000
424};
425
426static const u32 kalindi_mgcg_cgcg_init[] =
427{
428 0xc420, 0xffffffff, 0xfffffffc,
429 0x30800, 0xffffffff, 0xe0000000,
430 0x3c2a0, 0xffffffff, 0x00000100,
431 0x3c208, 0xffffffff, 0x00000100,
432 0x3c2c0, 0xffffffff, 0x00000100,
433 0x3c2c8, 0xffffffff, 0x00000100,
434 0x3c2c4, 0xffffffff, 0x00000100,
435 0x55e4, 0xffffffff, 0x00600100,
436 0x3c280, 0xffffffff, 0x00000100,
437 0x3c214, 0xffffffff, 0x06000100,
438 0x3c220, 0xffffffff, 0x00000100,
439 0x3c218, 0xffffffff, 0x06000100,
440 0x3c204, 0xffffffff, 0x00000100,
441 0x3c2e0, 0xffffffff, 0x00000100,
442 0x3c224, 0xffffffff, 0x00000100,
443 0x3c200, 0xffffffff, 0x00000100,
444 0x3c230, 0xffffffff, 0x00000100,
445 0x3c234, 0xffffffff, 0x00000100,
446 0x3c250, 0xffffffff, 0x00000100,
447 0x3c254, 0xffffffff, 0x00000100,
448 0x3c258, 0xffffffff, 0x00000100,
449 0x3c25c, 0xffffffff, 0x00000100,
450 0x3c260, 0xffffffff, 0x00000100,
451 0x3c27c, 0xffffffff, 0x00000100,
452 0x3c278, 0xffffffff, 0x00000100,
453 0x3c210, 0xffffffff, 0x06000100,
454 0x3c290, 0xffffffff, 0x00000100,
455 0x3c274, 0xffffffff, 0x00000100,
456 0x3c2b4, 0xffffffff, 0x00000100,
457 0x3c2b0, 0xffffffff, 0x00000100,
458 0x3c270, 0xffffffff, 0x00000100,
459 0x30800, 0xffffffff, 0xe0000000,
460 0x3c020, 0xffffffff, 0x00010000,
461 0x3c024, 0xffffffff, 0x00030002,
462 0x3c028, 0xffffffff, 0x00040007,
463 0x3c02c, 0xffffffff, 0x00060005,
464 0x3c030, 0xffffffff, 0x00090008,
465 0x3c034, 0xffffffff, 0x00010000,
466 0x3c038, 0xffffffff, 0x00030002,
467 0x3c03c, 0xffffffff, 0x00040007,
468 0x3c040, 0xffffffff, 0x00060005,
469 0x3c044, 0xffffffff, 0x00090008,
470 0x3c000, 0xffffffff, 0x96e00200,
471 0x8708, 0xffffffff, 0x00900100,
472 0xc424, 0xffffffff, 0x0020003f,
473 0x38, 0xffffffff, 0x0140001c,
474 0x3c, 0x000f0000, 0x000f0000,
475 0x220, 0xffffffff, 0xC060000C,
476 0x224, 0xc0000fff, 0x00000100,
477 0x20a8, 0xffffffff, 0x00000104,
478 0x55e4, 0xff000fff, 0x00000100,
479 0x30cc, 0xc0000fff, 0x00000104,
480 0xc1e4, 0x00000001, 0x00000001,
481 0xd00c, 0xff000ff0, 0x00000100,
482 0xd80c, 0xff000ff0, 0x00000100
483};
484
485static void cik_init_golden_registers(struct radeon_device *rdev)
486{
487 switch (rdev->family) {
488 case CHIP_BONAIRE:
489 radeon_program_register_sequence(rdev,
490 bonaire_mgcg_cgcg_init,
491 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
492 radeon_program_register_sequence(rdev,
493 bonaire_golden_registers,
494 (const u32)ARRAY_SIZE(bonaire_golden_registers));
495 radeon_program_register_sequence(rdev,
496 bonaire_golden_common_registers,
497 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
498 radeon_program_register_sequence(rdev,
499 bonaire_golden_spm_registers,
500 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
501 break;
502 case CHIP_KABINI:
503 radeon_program_register_sequence(rdev,
504 kalindi_mgcg_cgcg_init,
505 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
506 radeon_program_register_sequence(rdev,
507 kalindi_golden_registers,
508 (const u32)ARRAY_SIZE(kalindi_golden_registers));
509 radeon_program_register_sequence(rdev,
510 kalindi_golden_common_registers,
511 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
512 radeon_program_register_sequence(rdev,
513 kalindi_golden_spm_registers,
514 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
515 break;
516 case CHIP_KAVERI:
517 radeon_program_register_sequence(rdev,
518 spectre_mgcg_cgcg_init,
519 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
520 radeon_program_register_sequence(rdev,
521 spectre_golden_registers,
522 (const u32)ARRAY_SIZE(spectre_golden_registers));
523 radeon_program_register_sequence(rdev,
524 spectre_golden_common_registers,
525 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
526 radeon_program_register_sequence(rdev,
527 spectre_golden_spm_registers,
528 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
529 break;
530 default:
531 break;
532 }
533}
534
Alex Deucher2c679122013-04-09 13:32:18 -0400535/**
536 * cik_get_xclk - get the xclk
537 *
538 * @rdev: radeon_device pointer
539 *
540 * Returns the reference clock used by the gfx engine
541 * (CIK).
542 */
543u32 cik_get_xclk(struct radeon_device *rdev)
544{
545 u32 reference_clock = rdev->clock.spll.reference_freq;
546
547 if (rdev->flags & RADEON_IS_IGP) {
548 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
549 return reference_clock / 2;
550 } else {
551 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
552 return reference_clock / 4;
553 }
554 return reference_clock;
555}
556
Alex Deucher75efdee2013-03-04 12:47:46 -0500557/**
558 * cik_mm_rdoorbell - read a doorbell dword
559 *
560 * @rdev: radeon_device pointer
561 * @offset: byte offset into the aperture
562 *
563 * Returns the value in the doorbell aperture at the
564 * requested offset (CIK).
565 */
566u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
567{
568 if (offset < rdev->doorbell.size) {
569 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
570 } else {
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
572 return 0;
573 }
574}
575
576/**
577 * cik_mm_wdoorbell - write a doorbell dword
578 *
579 * @rdev: radeon_device pointer
580 * @offset: byte offset into the aperture
581 * @v: value to write
582 *
583 * Writes @v to the doorbell aperture at the
584 * requested offset (CIK).
585 */
586void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
587{
588 if (offset < rdev->doorbell.size) {
589 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
590 } else {
591 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
592 }
593}
594
Alex Deucherbc8273f2012-06-29 19:44:04 -0400595#define BONAIRE_IO_MC_REGS_SIZE 36
596
597static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
598{
599 {0x00000070, 0x04400000},
600 {0x00000071, 0x80c01803},
601 {0x00000072, 0x00004004},
602 {0x00000073, 0x00000100},
603 {0x00000074, 0x00ff0000},
604 {0x00000075, 0x34000000},
605 {0x00000076, 0x08000014},
606 {0x00000077, 0x00cc08ec},
607 {0x00000078, 0x00000400},
608 {0x00000079, 0x00000000},
609 {0x0000007a, 0x04090000},
610 {0x0000007c, 0x00000000},
611 {0x0000007e, 0x4408a8e8},
612 {0x0000007f, 0x00000304},
613 {0x00000080, 0x00000000},
614 {0x00000082, 0x00000001},
615 {0x00000083, 0x00000002},
616 {0x00000084, 0xf3e4f400},
617 {0x00000085, 0x052024e3},
618 {0x00000087, 0x00000000},
619 {0x00000088, 0x01000000},
620 {0x0000008a, 0x1c0a0000},
621 {0x0000008b, 0xff010000},
622 {0x0000008d, 0xffffefff},
623 {0x0000008e, 0xfff3efff},
624 {0x0000008f, 0xfff3efbf},
625 {0x00000092, 0xf7ffffff},
626 {0x00000093, 0xffffff7f},
627 {0x00000095, 0x00101101},
628 {0x00000096, 0x00000fff},
629 {0x00000097, 0x00116fff},
630 {0x00000098, 0x60010000},
631 {0x00000099, 0x10010000},
632 {0x0000009a, 0x00006000},
633 {0x0000009b, 0x00001000},
634 {0x0000009f, 0x00b48000}
635};
636
Alex Deucherb556b122013-01-29 10:44:22 -0500637/**
638 * cik_srbm_select - select specific register instances
639 *
640 * @rdev: radeon_device pointer
641 * @me: selected ME (micro engine)
642 * @pipe: pipe
643 * @queue: queue
644 * @vmid: VMID
645 *
646 * Switches the currently active registers instances. Some
647 * registers are instanced per VMID, others are instanced per
648 * me/pipe/queue combination.
649 */
650static void cik_srbm_select(struct radeon_device *rdev,
651 u32 me, u32 pipe, u32 queue, u32 vmid)
652{
653 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
654 MEID(me & 0x3) |
655 VMID(vmid & 0xf) |
656 QUEUEID(queue & 0x7));
657 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
658}
659
Alex Deucherbc8273f2012-06-29 19:44:04 -0400660/* ucode loading */
661/**
662 * ci_mc_load_microcode - load MC ucode into the hw
663 *
664 * @rdev: radeon_device pointer
665 *
666 * Load the GDDR MC ucode into the hw (CIK).
667 * Returns 0 on success, error on failure.
668 */
669static int ci_mc_load_microcode(struct radeon_device *rdev)
670{
671 const __be32 *fw_data;
672 u32 running, blackout = 0;
673 u32 *io_mc_regs;
674 int i, ucode_size, regs_size;
675
676 if (!rdev->mc_fw)
677 return -EINVAL;
678
679 switch (rdev->family) {
680 case CHIP_BONAIRE:
681 default:
682 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
683 ucode_size = CIK_MC_UCODE_SIZE;
684 regs_size = BONAIRE_IO_MC_REGS_SIZE;
685 break;
686 }
687
688 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
689
690 if (running == 0) {
691 if (running) {
692 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
693 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
694 }
695
696 /* reset the engine and set to writable */
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
699
700 /* load mc io regs */
701 for (i = 0; i < regs_size; i++) {
702 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
703 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
704 }
705 /* load the MC ucode */
706 fw_data = (const __be32 *)rdev->mc_fw->data;
707 for (i = 0; i < ucode_size; i++)
708 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
709
710 /* put the engine back into the active state */
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
713 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
714
715 /* wait for training to complete */
716 for (i = 0; i < rdev->usec_timeout; i++) {
717 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
718 break;
719 udelay(1);
720 }
721 for (i = 0; i < rdev->usec_timeout; i++) {
722 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
723 break;
724 udelay(1);
725 }
726
727 if (running)
728 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
729 }
730
731 return 0;
732}
733
Alex Deucher02c81322012-12-18 21:43:07 -0500734/**
735 * cik_init_microcode - load ucode images from disk
736 *
737 * @rdev: radeon_device pointer
738 *
739 * Use the firmware interface to load the ucode images into
740 * the driver (not loaded into hw).
741 * Returns 0 on success, error on failure.
742 */
743static int cik_init_microcode(struct radeon_device *rdev)
744{
745 struct platform_device *pdev;
746 const char *chip_name;
747 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400748 mec_req_size, rlc_req_size, mc_req_size,
749 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500750 char fw_name[30];
751 int err;
752
753 DRM_DEBUG("\n");
754
755 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
756 err = IS_ERR(pdev);
757 if (err) {
758 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
759 return -EINVAL;
760 }
761
762 switch (rdev->family) {
763 case CHIP_BONAIRE:
764 chip_name = "BONAIRE";
765 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
766 me_req_size = CIK_ME_UCODE_SIZE * 4;
767 ce_req_size = CIK_CE_UCODE_SIZE * 4;
768 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
769 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
770 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500772 break;
773 case CHIP_KAVERI:
774 chip_name = "KAVERI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500781 break;
782 case CHIP_KABINI:
783 chip_name = "KABINI";
784 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
785 me_req_size = CIK_ME_UCODE_SIZE * 4;
786 ce_req_size = CIK_CE_UCODE_SIZE * 4;
787 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
788 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400789 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500790 break;
791 default: BUG();
792 }
793
794 DRM_INFO("Loading %s Microcode\n", chip_name);
795
796 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
797 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
798 if (err)
799 goto out;
800 if (rdev->pfp_fw->size != pfp_req_size) {
801 printk(KERN_ERR
802 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803 rdev->pfp_fw->size, fw_name);
804 err = -EINVAL;
805 goto out;
806 }
807
808 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
809 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
810 if (err)
811 goto out;
812 if (rdev->me_fw->size != me_req_size) {
813 printk(KERN_ERR
814 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815 rdev->me_fw->size, fw_name);
816 err = -EINVAL;
817 }
818
819 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
820 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
821 if (err)
822 goto out;
823 if (rdev->ce_fw->size != ce_req_size) {
824 printk(KERN_ERR
825 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
826 rdev->ce_fw->size, fw_name);
827 err = -EINVAL;
828 }
829
830 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
831 err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
832 if (err)
833 goto out;
834 if (rdev->mec_fw->size != mec_req_size) {
835 printk(KERN_ERR
836 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
837 rdev->mec_fw->size, fw_name);
838 err = -EINVAL;
839 }
840
841 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
842 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
843 if (err)
844 goto out;
845 if (rdev->rlc_fw->size != rlc_req_size) {
846 printk(KERN_ERR
847 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
848 rdev->rlc_fw->size, fw_name);
849 err = -EINVAL;
850 }
851
Alex Deucher21a93e12013-04-09 12:47:11 -0400852 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
853 err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
854 if (err)
855 goto out;
856 if (rdev->sdma_fw->size != sdma_req_size) {
857 printk(KERN_ERR
858 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
859 rdev->sdma_fw->size, fw_name);
860 err = -EINVAL;
861 }
862
Alex Deucher02c81322012-12-18 21:43:07 -0500863 /* No MC ucode on APUs */
864 if (!(rdev->flags & RADEON_IS_IGP)) {
865 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
866 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
867 if (err)
868 goto out;
869 if (rdev->mc_fw->size != mc_req_size) {
870 printk(KERN_ERR
871 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
872 rdev->mc_fw->size, fw_name);
873 err = -EINVAL;
874 }
875 }
876
877out:
878 platform_device_unregister(pdev);
879
880 if (err) {
881 if (err != -EINVAL)
882 printk(KERN_ERR
883 "cik_cp: Failed to load firmware \"%s\"\n",
884 fw_name);
885 release_firmware(rdev->pfp_fw);
886 rdev->pfp_fw = NULL;
887 release_firmware(rdev->me_fw);
888 rdev->me_fw = NULL;
889 release_firmware(rdev->ce_fw);
890 rdev->ce_fw = NULL;
891 release_firmware(rdev->rlc_fw);
892 rdev->rlc_fw = NULL;
893 release_firmware(rdev->mc_fw);
894 rdev->mc_fw = NULL;
895 }
896 return err;
897}
898
Alex Deucher8cc1a532013-04-09 12:41:24 -0400899/*
900 * Core functions
901 */
902/**
903 * cik_tiling_mode_table_init - init the hw tiling table
904 *
905 * @rdev: radeon_device pointer
906 *
907 * Starting with SI, the tiling setup is done globally in a
908 * set of 32 tiling modes. Rather than selecting each set of
909 * parameters per surface as on older asics, we just select
910 * which index in the tiling table we want to use, and the
911 * surface uses those parameters (CIK).
912 */
913static void cik_tiling_mode_table_init(struct radeon_device *rdev)
914{
915 const u32 num_tile_mode_states = 32;
916 const u32 num_secondary_tile_mode_states = 16;
917 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
918 u32 num_pipe_configs;
919 u32 num_rbs = rdev->config.cik.max_backends_per_se *
920 rdev->config.cik.max_shader_engines;
921
922 switch (rdev->config.cik.mem_row_size_in_kb) {
923 case 1:
924 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
925 break;
926 case 2:
927 default:
928 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
929 break;
930 case 4:
931 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
932 break;
933 }
934
935 num_pipe_configs = rdev->config.cik.max_tile_pipes;
936 if (num_pipe_configs > 8)
937 num_pipe_configs = 8; /* ??? */
938
939 if (num_pipe_configs == 8) {
940 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
941 switch (reg_offset) {
942 case 0:
943 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
947 break;
948 case 1:
949 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
950 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
951 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
953 break;
954 case 2:
955 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
958 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
959 break;
960 case 3:
961 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
962 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
965 break;
966 case 4:
967 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 TILE_SPLIT(split_equal_to_row_size));
971 break;
972 case 5:
973 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
974 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
975 break;
976 case 6:
977 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
981 break;
982 case 7:
983 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986 TILE_SPLIT(split_equal_to_row_size));
987 break;
988 case 8:
989 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
991 break;
992 case 9:
993 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
995 break;
996 case 10:
997 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1001 break;
1002 case 11:
1003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007 break;
1008 case 12:
1009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013 break;
1014 case 13:
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1017 break;
1018 case 14:
1019 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1023 break;
1024 case 16:
1025 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1027 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029 break;
1030 case 17:
1031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1032 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035 break;
1036 case 27:
1037 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1038 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1039 break;
1040 case 28:
1041 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1042 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1044 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1045 break;
1046 case 29:
1047 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1048 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1049 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051 break;
1052 case 30:
1053 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1054 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1057 break;
1058 default:
1059 gb_tile_moden = 0;
1060 break;
1061 }
1062 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1063 }
1064 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1065 switch (reg_offset) {
1066 case 0:
1067 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1070 NUM_BANKS(ADDR_SURF_16_BANK));
1071 break;
1072 case 1:
1073 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1076 NUM_BANKS(ADDR_SURF_16_BANK));
1077 break;
1078 case 2:
1079 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1080 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1081 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1082 NUM_BANKS(ADDR_SURF_16_BANK));
1083 break;
1084 case 3:
1085 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1088 NUM_BANKS(ADDR_SURF_16_BANK));
1089 break;
1090 case 4:
1091 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1092 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1093 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1094 NUM_BANKS(ADDR_SURF_8_BANK));
1095 break;
1096 case 5:
1097 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1100 NUM_BANKS(ADDR_SURF_4_BANK));
1101 break;
1102 case 6:
1103 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1106 NUM_BANKS(ADDR_SURF_2_BANK));
1107 break;
1108 case 8:
1109 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1110 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1111 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1112 NUM_BANKS(ADDR_SURF_16_BANK));
1113 break;
1114 case 9:
1115 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1116 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1117 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1118 NUM_BANKS(ADDR_SURF_16_BANK));
1119 break;
1120 case 10:
1121 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1124 NUM_BANKS(ADDR_SURF_16_BANK));
1125 break;
1126 case 11:
1127 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1130 NUM_BANKS(ADDR_SURF_16_BANK));
1131 break;
1132 case 12:
1133 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1136 NUM_BANKS(ADDR_SURF_8_BANK));
1137 break;
1138 case 13:
1139 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1142 NUM_BANKS(ADDR_SURF_4_BANK));
1143 break;
1144 case 14:
1145 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1148 NUM_BANKS(ADDR_SURF_2_BANK));
1149 break;
1150 default:
1151 gb_tile_moden = 0;
1152 break;
1153 }
1154 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1155 }
1156 } else if (num_pipe_configs == 4) {
1157 if (num_rbs == 4) {
1158 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1159 switch (reg_offset) {
1160 case 0:
1161 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1163 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1164 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1165 break;
1166 case 1:
1167 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1169 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1171 break;
1172 case 2:
1173 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1175 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1177 break;
1178 case 3:
1179 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1181 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1183 break;
1184 case 4:
1185 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 TILE_SPLIT(split_equal_to_row_size));
1189 break;
1190 case 5:
1191 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1193 break;
1194 case 6:
1195 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1196 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1197 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1199 break;
1200 case 7:
1201 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1203 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1204 TILE_SPLIT(split_equal_to_row_size));
1205 break;
1206 case 8:
1207 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1208 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1209 break;
1210 case 9:
1211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1213 break;
1214 case 10:
1215 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1216 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1217 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1219 break;
1220 case 11:
1221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1222 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1223 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1225 break;
1226 case 12:
1227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 break;
1232 case 13:
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1235 break;
1236 case 14:
1237 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1238 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1239 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1241 break;
1242 case 16:
1243 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1245 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1247 break;
1248 case 17:
1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 break;
1254 case 27:
1255 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1257 break;
1258 case 28:
1259 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1260 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1261 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1263 break;
1264 case 29:
1265 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1266 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1267 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1269 break;
1270 case 30:
1271 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1273 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1275 break;
1276 default:
1277 gb_tile_moden = 0;
1278 break;
1279 }
1280 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1281 }
1282 } else if (num_rbs < 4) {
1283 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1284 switch (reg_offset) {
1285 case 0:
1286 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1288 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1290 break;
1291 case 1:
1292 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1294 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1295 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1296 break;
1297 case 2:
1298 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1299 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1300 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1302 break;
1303 case 3:
1304 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1306 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1308 break;
1309 case 4:
1310 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1311 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1312 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1313 TILE_SPLIT(split_equal_to_row_size));
1314 break;
1315 case 5:
1316 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1318 break;
1319 case 6:
1320 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1322 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1323 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1324 break;
1325 case 7:
1326 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1328 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329 TILE_SPLIT(split_equal_to_row_size));
1330 break;
1331 case 8:
1332 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1334 break;
1335 case 9:
1336 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1337 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1338 break;
1339 case 10:
1340 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1341 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1342 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1344 break;
1345 case 11:
1346 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1347 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1348 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1350 break;
1351 case 12:
1352 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1353 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1354 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1356 break;
1357 case 13:
1358 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1360 break;
1361 case 14:
1362 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1364 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1366 break;
1367 case 16:
1368 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1369 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1370 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1372 break;
1373 case 17:
1374 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1376 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1378 break;
1379 case 27:
1380 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1381 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1382 break;
1383 case 28:
1384 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1385 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1386 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1388 break;
1389 case 29:
1390 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1391 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1392 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1394 break;
1395 case 30:
1396 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1397 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1398 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1400 break;
1401 default:
1402 gb_tile_moden = 0;
1403 break;
1404 }
1405 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1406 }
1407 }
1408 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1409 switch (reg_offset) {
1410 case 0:
1411 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1414 NUM_BANKS(ADDR_SURF_16_BANK));
1415 break;
1416 case 1:
1417 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1420 NUM_BANKS(ADDR_SURF_16_BANK));
1421 break;
1422 case 2:
1423 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1426 NUM_BANKS(ADDR_SURF_16_BANK));
1427 break;
1428 case 3:
1429 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1432 NUM_BANKS(ADDR_SURF_16_BANK));
1433 break;
1434 case 4:
1435 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1436 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1437 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1438 NUM_BANKS(ADDR_SURF_16_BANK));
1439 break;
1440 case 5:
1441 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1444 NUM_BANKS(ADDR_SURF_8_BANK));
1445 break;
1446 case 6:
1447 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1450 NUM_BANKS(ADDR_SURF_4_BANK));
1451 break;
1452 case 8:
1453 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1456 NUM_BANKS(ADDR_SURF_16_BANK));
1457 break;
1458 case 9:
1459 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1462 NUM_BANKS(ADDR_SURF_16_BANK));
1463 break;
1464 case 10:
1465 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1468 NUM_BANKS(ADDR_SURF_16_BANK));
1469 break;
1470 case 11:
1471 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1474 NUM_BANKS(ADDR_SURF_16_BANK));
1475 break;
1476 case 12:
1477 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1480 NUM_BANKS(ADDR_SURF_16_BANK));
1481 break;
1482 case 13:
1483 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1486 NUM_BANKS(ADDR_SURF_8_BANK));
1487 break;
1488 case 14:
1489 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1492 NUM_BANKS(ADDR_SURF_4_BANK));
1493 break;
1494 default:
1495 gb_tile_moden = 0;
1496 break;
1497 }
1498 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1499 }
1500 } else if (num_pipe_configs == 2) {
1501 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1502 switch (reg_offset) {
1503 case 0:
1504 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1506 PIPE_CONFIG(ADDR_SURF_P2) |
1507 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1508 break;
1509 case 1:
1510 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1512 PIPE_CONFIG(ADDR_SURF_P2) |
1513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1514 break;
1515 case 2:
1516 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1518 PIPE_CONFIG(ADDR_SURF_P2) |
1519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1520 break;
1521 case 3:
1522 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1524 PIPE_CONFIG(ADDR_SURF_P2) |
1525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1526 break;
1527 case 4:
1528 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1530 PIPE_CONFIG(ADDR_SURF_P2) |
1531 TILE_SPLIT(split_equal_to_row_size));
1532 break;
1533 case 5:
1534 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1536 break;
1537 case 6:
1538 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1540 PIPE_CONFIG(ADDR_SURF_P2) |
1541 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1542 break;
1543 case 7:
1544 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1546 PIPE_CONFIG(ADDR_SURF_P2) |
1547 TILE_SPLIT(split_equal_to_row_size));
1548 break;
1549 case 8:
1550 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1551 break;
1552 case 9:
1553 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1554 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1555 break;
1556 case 10:
1557 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1558 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1559 PIPE_CONFIG(ADDR_SURF_P2) |
1560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1561 break;
1562 case 11:
1563 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1564 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1565 PIPE_CONFIG(ADDR_SURF_P2) |
1566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1567 break;
1568 case 12:
1569 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1570 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1571 PIPE_CONFIG(ADDR_SURF_P2) |
1572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1573 break;
1574 case 13:
1575 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1577 break;
1578 case 14:
1579 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1581 PIPE_CONFIG(ADDR_SURF_P2) |
1582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1583 break;
1584 case 16:
1585 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1586 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1587 PIPE_CONFIG(ADDR_SURF_P2) |
1588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1589 break;
1590 case 17:
1591 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1592 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1593 PIPE_CONFIG(ADDR_SURF_P2) |
1594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1595 break;
1596 case 27:
1597 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1598 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1599 break;
1600 case 28:
1601 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1602 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1603 PIPE_CONFIG(ADDR_SURF_P2) |
1604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1605 break;
1606 case 29:
1607 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1608 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1609 PIPE_CONFIG(ADDR_SURF_P2) |
1610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1611 break;
1612 case 30:
1613 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1614 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1615 PIPE_CONFIG(ADDR_SURF_P2) |
1616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1617 break;
1618 default:
1619 gb_tile_moden = 0;
1620 break;
1621 }
1622 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1623 }
1624 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1625 switch (reg_offset) {
1626 case 0:
1627 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630 NUM_BANKS(ADDR_SURF_16_BANK));
1631 break;
1632 case 1:
1633 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636 NUM_BANKS(ADDR_SURF_16_BANK));
1637 break;
1638 case 2:
1639 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642 NUM_BANKS(ADDR_SURF_16_BANK));
1643 break;
1644 case 3:
1645 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648 NUM_BANKS(ADDR_SURF_16_BANK));
1649 break;
1650 case 4:
1651 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1654 NUM_BANKS(ADDR_SURF_16_BANK));
1655 break;
1656 case 5:
1657 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660 NUM_BANKS(ADDR_SURF_16_BANK));
1661 break;
1662 case 6:
1663 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1666 NUM_BANKS(ADDR_SURF_8_BANK));
1667 break;
1668 case 8:
1669 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672 NUM_BANKS(ADDR_SURF_16_BANK));
1673 break;
1674 case 9:
1675 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678 NUM_BANKS(ADDR_SURF_16_BANK));
1679 break;
1680 case 10:
1681 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684 NUM_BANKS(ADDR_SURF_16_BANK));
1685 break;
1686 case 11:
1687 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690 NUM_BANKS(ADDR_SURF_16_BANK));
1691 break;
1692 case 12:
1693 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1695 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1696 NUM_BANKS(ADDR_SURF_16_BANK));
1697 break;
1698 case 13:
1699 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1700 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1701 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1702 NUM_BANKS(ADDR_SURF_16_BANK));
1703 break;
1704 case 14:
1705 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1706 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1707 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1708 NUM_BANKS(ADDR_SURF_8_BANK));
1709 break;
1710 default:
1711 gb_tile_moden = 0;
1712 break;
1713 }
1714 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1715 }
1716 } else
1717 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1718}
1719
1720/**
1721 * cik_select_se_sh - select which SE, SH to address
1722 *
1723 * @rdev: radeon_device pointer
1724 * @se_num: shader engine to address
1725 * @sh_num: sh block to address
1726 *
1727 * Select which SE, SH combinations to address. Certain
1728 * registers are instanced per SE or SH. 0xffffffff means
1729 * broadcast to all SEs or SHs (CIK).
1730 */
1731static void cik_select_se_sh(struct radeon_device *rdev,
1732 u32 se_num, u32 sh_num)
1733{
1734 u32 data = INSTANCE_BROADCAST_WRITES;
1735
1736 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1737 data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1738 else if (se_num == 0xffffffff)
1739 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1740 else if (sh_num == 0xffffffff)
1741 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1742 else
1743 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1744 WREG32(GRBM_GFX_INDEX, data);
1745}
1746
1747/**
1748 * cik_create_bitmask - create a bitmask
1749 *
1750 * @bit_width: length of the mask
1751 *
1752 * create a variable length bit mask (CIK).
1753 * Returns the bitmask.
1754 */
1755static u32 cik_create_bitmask(u32 bit_width)
1756{
1757 u32 i, mask = 0;
1758
1759 for (i = 0; i < bit_width; i++) {
1760 mask <<= 1;
1761 mask |= 1;
1762 }
1763 return mask;
1764}
1765
1766/**
1767 * cik_select_se_sh - select which SE, SH to address
1768 *
1769 * @rdev: radeon_device pointer
1770 * @max_rb_num: max RBs (render backends) for the asic
1771 * @se_num: number of SEs (shader engines) for the asic
1772 * @sh_per_se: number of SH blocks per SE for the asic
1773 *
1774 * Calculates the bitmask of disabled RBs (CIK).
1775 * Returns the disabled RB bitmask.
1776 */
1777static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1778 u32 max_rb_num, u32 se_num,
1779 u32 sh_per_se)
1780{
1781 u32 data, mask;
1782
1783 data = RREG32(CC_RB_BACKEND_DISABLE);
1784 if (data & 1)
1785 data &= BACKEND_DISABLE_MASK;
1786 else
1787 data = 0;
1788 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1789
1790 data >>= BACKEND_DISABLE_SHIFT;
1791
1792 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1793
1794 return data & mask;
1795}
1796
1797/**
1798 * cik_setup_rb - setup the RBs on the asic
1799 *
1800 * @rdev: radeon_device pointer
1801 * @se_num: number of SEs (shader engines) for the asic
1802 * @sh_per_se: number of SH blocks per SE for the asic
1803 * @max_rb_num: max RBs (render backends) for the asic
1804 *
1805 * Configures per-SE/SH RB registers (CIK).
1806 */
1807static void cik_setup_rb(struct radeon_device *rdev,
1808 u32 se_num, u32 sh_per_se,
1809 u32 max_rb_num)
1810{
1811 int i, j;
1812 u32 data, mask;
1813 u32 disabled_rbs = 0;
1814 u32 enabled_rbs = 0;
1815
1816 for (i = 0; i < se_num; i++) {
1817 for (j = 0; j < sh_per_se; j++) {
1818 cik_select_se_sh(rdev, i, j);
1819 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1820 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1821 }
1822 }
1823 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1824
1825 mask = 1;
1826 for (i = 0; i < max_rb_num; i++) {
1827 if (!(disabled_rbs & mask))
1828 enabled_rbs |= mask;
1829 mask <<= 1;
1830 }
1831
1832 for (i = 0; i < se_num; i++) {
1833 cik_select_se_sh(rdev, i, 0xffffffff);
1834 data = 0;
1835 for (j = 0; j < sh_per_se; j++) {
1836 switch (enabled_rbs & 3) {
1837 case 1:
1838 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1839 break;
1840 case 2:
1841 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1842 break;
1843 case 3:
1844 default:
1845 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1846 break;
1847 }
1848 enabled_rbs >>= 2;
1849 }
1850 WREG32(PA_SC_RASTER_CONFIG, data);
1851 }
1852 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1853}
1854
1855/**
1856 * cik_gpu_init - setup the 3D engine
1857 *
1858 * @rdev: radeon_device pointer
1859 *
1860 * Configures the 3D engine and tiling configuration
1861 * registers so that the 3D engine is usable.
1862 */
1863static void cik_gpu_init(struct radeon_device *rdev)
1864{
1865 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1866 u32 mc_shared_chmap, mc_arb_ramcfg;
1867 u32 hdp_host_path_cntl;
1868 u32 tmp;
1869 int i, j;
1870
1871 switch (rdev->family) {
1872 case CHIP_BONAIRE:
1873 rdev->config.cik.max_shader_engines = 2;
1874 rdev->config.cik.max_tile_pipes = 4;
1875 rdev->config.cik.max_cu_per_sh = 7;
1876 rdev->config.cik.max_sh_per_se = 1;
1877 rdev->config.cik.max_backends_per_se = 2;
1878 rdev->config.cik.max_texture_channel_caches = 4;
1879 rdev->config.cik.max_gprs = 256;
1880 rdev->config.cik.max_gs_threads = 32;
1881 rdev->config.cik.max_hw_contexts = 8;
1882
1883 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1884 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1885 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1886 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1887 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1888 break;
1889 case CHIP_KAVERI:
1890 /* TODO */
1891 break;
1892 case CHIP_KABINI:
1893 default:
1894 rdev->config.cik.max_shader_engines = 1;
1895 rdev->config.cik.max_tile_pipes = 2;
1896 rdev->config.cik.max_cu_per_sh = 2;
1897 rdev->config.cik.max_sh_per_se = 1;
1898 rdev->config.cik.max_backends_per_se = 1;
1899 rdev->config.cik.max_texture_channel_caches = 2;
1900 rdev->config.cik.max_gprs = 256;
1901 rdev->config.cik.max_gs_threads = 16;
1902 rdev->config.cik.max_hw_contexts = 8;
1903
1904 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1905 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1906 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1907 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1908 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1909 break;
1910 }
1911
1912 /* Initialize HDP */
1913 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1914 WREG32((0x2c14 + j), 0x00000000);
1915 WREG32((0x2c18 + j), 0x00000000);
1916 WREG32((0x2c1c + j), 0x00000000);
1917 WREG32((0x2c20 + j), 0x00000000);
1918 WREG32((0x2c24 + j), 0x00000000);
1919 }
1920
1921 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1922
1923 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1924
1925 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1926 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1927
1928 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1929 rdev->config.cik.mem_max_burst_length_bytes = 256;
1930 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1931 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1932 if (rdev->config.cik.mem_row_size_in_kb > 4)
1933 rdev->config.cik.mem_row_size_in_kb = 4;
1934 /* XXX use MC settings? */
1935 rdev->config.cik.shader_engine_tile_size = 32;
1936 rdev->config.cik.num_gpus = 1;
1937 rdev->config.cik.multi_gpu_tile_size = 64;
1938
1939 /* fix up row size */
1940 gb_addr_config &= ~ROW_SIZE_MASK;
1941 switch (rdev->config.cik.mem_row_size_in_kb) {
1942 case 1:
1943 default:
1944 gb_addr_config |= ROW_SIZE(0);
1945 break;
1946 case 2:
1947 gb_addr_config |= ROW_SIZE(1);
1948 break;
1949 case 4:
1950 gb_addr_config |= ROW_SIZE(2);
1951 break;
1952 }
1953
1954 /* setup tiling info dword. gb_addr_config is not adequate since it does
1955 * not have bank info, so create a custom tiling dword.
1956 * bits 3:0 num_pipes
1957 * bits 7:4 num_banks
1958 * bits 11:8 group_size
1959 * bits 15:12 row_size
1960 */
1961 rdev->config.cik.tile_config = 0;
1962 switch (rdev->config.cik.num_tile_pipes) {
1963 case 1:
1964 rdev->config.cik.tile_config |= (0 << 0);
1965 break;
1966 case 2:
1967 rdev->config.cik.tile_config |= (1 << 0);
1968 break;
1969 case 4:
1970 rdev->config.cik.tile_config |= (2 << 0);
1971 break;
1972 case 8:
1973 default:
1974 /* XXX what about 12? */
1975 rdev->config.cik.tile_config |= (3 << 0);
1976 break;
1977 }
1978 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1979 rdev->config.cik.tile_config |= 1 << 4;
1980 else
1981 rdev->config.cik.tile_config |= 0 << 4;
1982 rdev->config.cik.tile_config |=
1983 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1984 rdev->config.cik.tile_config |=
1985 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1986
1987 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1988 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1989 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001990 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1991 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001992 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1993 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1994 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001995
1996 cik_tiling_mode_table_init(rdev);
1997
1998 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1999 rdev->config.cik.max_sh_per_se,
2000 rdev->config.cik.max_backends_per_se);
2001
2002 /* set HW defaults for 3D engine */
2003 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2004
2005 WREG32(SX_DEBUG_1, 0x20);
2006
2007 WREG32(TA_CNTL_AUX, 0x00010000);
2008
2009 tmp = RREG32(SPI_CONFIG_CNTL);
2010 tmp |= 0x03000000;
2011 WREG32(SPI_CONFIG_CNTL, tmp);
2012
2013 WREG32(SQ_CONFIG, 1);
2014
2015 WREG32(DB_DEBUG, 0);
2016
2017 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2018 tmp |= 0x00000400;
2019 WREG32(DB_DEBUG2, tmp);
2020
2021 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2022 tmp |= 0x00020200;
2023 WREG32(DB_DEBUG3, tmp);
2024
2025 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2026 tmp |= 0x00018208;
2027 WREG32(CB_HW_CONTROL, tmp);
2028
2029 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2030
2031 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2032 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2033 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2034 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2035
2036 WREG32(VGT_NUM_INSTANCES, 1);
2037
2038 WREG32(CP_PERFMON_CNTL, 0);
2039
2040 WREG32(SQ_CONFIG, 0);
2041
2042 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2043 FORCE_EOV_MAX_REZ_CNT(255)));
2044
2045 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2046 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2047
2048 WREG32(VGT_GS_VERTEX_REUSE, 16);
2049 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2050
2051 tmp = RREG32(HDP_MISC_CNTL);
2052 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2053 WREG32(HDP_MISC_CNTL, tmp);
2054
2055 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2056 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2057
2058 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2059 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2060
2061 udelay(50);
2062}
2063
Alex Deucher841cf442012-12-18 21:47:44 -05002064/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002065 * GPU scratch registers helpers function.
2066 */
2067/**
2068 * cik_scratch_init - setup driver info for CP scratch regs
2069 *
2070 * @rdev: radeon_device pointer
2071 *
2072 * Set up the number and offset of the CP scratch registers.
2073 * NOTE: use of CP scratch registers is a legacy inferface and
2074 * is not used by default on newer asics (r6xx+). On newer asics,
2075 * memory buffers are used for fences rather than scratch regs.
2076 */
2077static void cik_scratch_init(struct radeon_device *rdev)
2078{
2079 int i;
2080
2081 rdev->scratch.num_reg = 7;
2082 rdev->scratch.reg_base = SCRATCH_REG0;
2083 for (i = 0; i < rdev->scratch.num_reg; i++) {
2084 rdev->scratch.free[i] = true;
2085 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2086 }
2087}
2088
2089/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002090 * cik_ring_test - basic gfx ring test
2091 *
2092 * @rdev: radeon_device pointer
2093 * @ring: radeon_ring structure holding ring information
2094 *
2095 * Allocate a scratch register and write to it using the gfx ring (CIK).
2096 * Provides a basic gfx ring test to verify that the ring is working.
2097 * Used by cik_cp_gfx_resume();
2098 * Returns 0 on success, error on failure.
2099 */
2100int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2101{
2102 uint32_t scratch;
2103 uint32_t tmp = 0;
2104 unsigned i;
2105 int r;
2106
2107 r = radeon_scratch_get(rdev, &scratch);
2108 if (r) {
2109 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2110 return r;
2111 }
2112 WREG32(scratch, 0xCAFEDEAD);
2113 r = radeon_ring_lock(rdev, ring, 3);
2114 if (r) {
2115 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2116 radeon_scratch_free(rdev, scratch);
2117 return r;
2118 }
2119 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2120 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2121 radeon_ring_write(ring, 0xDEADBEEF);
2122 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002123
Alex Deucherfbc832c2012-07-20 14:41:35 -04002124 for (i = 0; i < rdev->usec_timeout; i++) {
2125 tmp = RREG32(scratch);
2126 if (tmp == 0xDEADBEEF)
2127 break;
2128 DRM_UDELAY(1);
2129 }
2130 if (i < rdev->usec_timeout) {
2131 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2132 } else {
2133 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2134 ring->idx, scratch, tmp);
2135 r = -EINVAL;
2136 }
2137 radeon_scratch_free(rdev, scratch);
2138 return r;
2139}
2140
2141/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002142 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002143 *
2144 * @rdev: radeon_device pointer
2145 * @fence: radeon fence object
2146 *
2147 * Emits a fence sequnce number on the gfx ring and flushes
2148 * GPU caches.
2149 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002150void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2151 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002152{
2153 struct radeon_ring *ring = &rdev->ring[fence->ring];
2154 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2155
2156 /* EVENT_WRITE_EOP - flush caches, send int */
2157 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2158 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2159 EOP_TC_ACTION_EN |
2160 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2161 EVENT_INDEX(5)));
2162 radeon_ring_write(ring, addr & 0xfffffffc);
2163 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2164 radeon_ring_write(ring, fence->seq);
2165 radeon_ring_write(ring, 0);
2166 /* HDP flush */
2167 /* We should be using the new WAIT_REG_MEM special op packet here
2168 * but it causes the CP to hang
2169 */
2170 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2171 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2172 WRITE_DATA_DST_SEL(0)));
2173 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2174 radeon_ring_write(ring, 0);
2175 radeon_ring_write(ring, 0);
2176}
2177
Alex Deucherb07fdd32013-04-11 09:36:17 -04002178/**
2179 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2180 *
2181 * @rdev: radeon_device pointer
2182 * @fence: radeon fence object
2183 *
2184 * Emits a fence sequnce number on the compute ring and flushes
2185 * GPU caches.
2186 */
2187void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2188 struct radeon_fence *fence)
2189{
2190 struct radeon_ring *ring = &rdev->ring[fence->ring];
2191 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2192
2193 /* RELEASE_MEM - flush caches, send int */
2194 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2195 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2196 EOP_TC_ACTION_EN |
2197 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2198 EVENT_INDEX(5)));
2199 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2200 radeon_ring_write(ring, addr & 0xfffffffc);
2201 radeon_ring_write(ring, upper_32_bits(addr));
2202 radeon_ring_write(ring, fence->seq);
2203 radeon_ring_write(ring, 0);
2204 /* HDP flush */
2205 /* We should be using the new WAIT_REG_MEM special op packet here
2206 * but it causes the CP to hang
2207 */
2208 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2209 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2210 WRITE_DATA_DST_SEL(0)));
2211 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2212 radeon_ring_write(ring, 0);
2213 radeon_ring_write(ring, 0);
2214}
2215
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002216void cik_semaphore_ring_emit(struct radeon_device *rdev,
2217 struct radeon_ring *ring,
2218 struct radeon_semaphore *semaphore,
2219 bool emit_wait)
2220{
2221 uint64_t addr = semaphore->gpu_addr;
2222 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2223
2224 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2225 radeon_ring_write(ring, addr & 0xffffffff);
2226 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2227}
2228
2229/*
2230 * IB stuff
2231 */
2232/**
2233 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2234 *
2235 * @rdev: radeon_device pointer
2236 * @ib: radeon indirect buffer object
2237 *
2238 * Emits an DE (drawing engine) or CE (constant engine) IB
2239 * on the gfx ring. IBs are usually generated by userspace
2240 * acceleration drivers and submitted to the kernel for
2241 * sheduling on the ring. This function schedules the IB
2242 * on the gfx ring for execution by the GPU.
2243 */
2244void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2245{
2246 struct radeon_ring *ring = &rdev->ring[ib->ring];
2247 u32 header, control = INDIRECT_BUFFER_VALID;
2248
2249 if (ib->is_const_ib) {
2250 /* set switch buffer packet before const IB */
2251 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2252 radeon_ring_write(ring, 0);
2253
2254 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2255 } else {
2256 u32 next_rptr;
2257 if (ring->rptr_save_reg) {
2258 next_rptr = ring->wptr + 3 + 4;
2259 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2260 radeon_ring_write(ring, ((ring->rptr_save_reg -
2261 PACKET3_SET_UCONFIG_REG_START) >> 2));
2262 radeon_ring_write(ring, next_rptr);
2263 } else if (rdev->wb.enabled) {
2264 next_rptr = ring->wptr + 5 + 4;
2265 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2266 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2267 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2268 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2269 radeon_ring_write(ring, next_rptr);
2270 }
2271
2272 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2273 }
2274
2275 control |= ib->length_dw |
2276 (ib->vm ? (ib->vm->id << 24) : 0);
2277
2278 radeon_ring_write(ring, header);
2279 radeon_ring_write(ring,
2280#ifdef __BIG_ENDIAN
2281 (2 << 0) |
2282#endif
2283 (ib->gpu_addr & 0xFFFFFFFC));
2284 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2285 radeon_ring_write(ring, control);
2286}
2287
Alex Deucherfbc832c2012-07-20 14:41:35 -04002288/**
2289 * cik_ib_test - basic gfx ring IB test
2290 *
2291 * @rdev: radeon_device pointer
2292 * @ring: radeon_ring structure holding ring information
2293 *
2294 * Allocate an IB and execute it on the gfx ring (CIK).
2295 * Provides a basic gfx ring test to verify that IBs are working.
2296 * Returns 0 on success, error on failure.
2297 */
2298int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2299{
2300 struct radeon_ib ib;
2301 uint32_t scratch;
2302 uint32_t tmp = 0;
2303 unsigned i;
2304 int r;
2305
2306 r = radeon_scratch_get(rdev, &scratch);
2307 if (r) {
2308 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2309 return r;
2310 }
2311 WREG32(scratch, 0xCAFEDEAD);
2312 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2313 if (r) {
2314 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2315 return r;
2316 }
2317 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2318 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2319 ib.ptr[2] = 0xDEADBEEF;
2320 ib.length_dw = 3;
2321 r = radeon_ib_schedule(rdev, &ib, NULL);
2322 if (r) {
2323 radeon_scratch_free(rdev, scratch);
2324 radeon_ib_free(rdev, &ib);
2325 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2326 return r;
2327 }
2328 r = radeon_fence_wait(ib.fence, false);
2329 if (r) {
2330 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2331 return r;
2332 }
2333 for (i = 0; i < rdev->usec_timeout; i++) {
2334 tmp = RREG32(scratch);
2335 if (tmp == 0xDEADBEEF)
2336 break;
2337 DRM_UDELAY(1);
2338 }
2339 if (i < rdev->usec_timeout) {
2340 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2341 } else {
2342 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2343 scratch, tmp);
2344 r = -EINVAL;
2345 }
2346 radeon_scratch_free(rdev, scratch);
2347 radeon_ib_free(rdev, &ib);
2348 return r;
2349}
2350
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002351/*
Alex Deucher841cf442012-12-18 21:47:44 -05002352 * CP.
2353 * On CIK, gfx and compute now have independant command processors.
2354 *
2355 * GFX
2356 * Gfx consists of a single ring and can process both gfx jobs and
2357 * compute jobs. The gfx CP consists of three microengines (ME):
2358 * PFP - Pre-Fetch Parser
2359 * ME - Micro Engine
2360 * CE - Constant Engine
2361 * The PFP and ME make up what is considered the Drawing Engine (DE).
2362 * The CE is an asynchronous engine used for updating buffer desciptors
2363 * used by the DE so that they can be loaded into cache in parallel
2364 * while the DE is processing state update packets.
2365 *
2366 * Compute
2367 * The compute CP consists of two microengines (ME):
2368 * MEC1 - Compute MicroEngine 1
2369 * MEC2 - Compute MicroEngine 2
2370 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2371 * The queues are exposed to userspace and are programmed directly
2372 * by the compute runtime.
2373 */
2374/**
2375 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2376 *
2377 * @rdev: radeon_device pointer
2378 * @enable: enable or disable the MEs
2379 *
2380 * Halts or unhalts the gfx MEs.
2381 */
2382static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2383{
2384 if (enable)
2385 WREG32(CP_ME_CNTL, 0);
2386 else {
2387 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2388 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2389 }
2390 udelay(50);
2391}
2392
2393/**
2394 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2395 *
2396 * @rdev: radeon_device pointer
2397 *
2398 * Loads the gfx PFP, ME, and CE ucode.
2399 * Returns 0 for success, -EINVAL if the ucode is not available.
2400 */
2401static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2402{
2403 const __be32 *fw_data;
2404 int i;
2405
2406 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2407 return -EINVAL;
2408
2409 cik_cp_gfx_enable(rdev, false);
2410
2411 /* PFP */
2412 fw_data = (const __be32 *)rdev->pfp_fw->data;
2413 WREG32(CP_PFP_UCODE_ADDR, 0);
2414 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2415 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2416 WREG32(CP_PFP_UCODE_ADDR, 0);
2417
2418 /* CE */
2419 fw_data = (const __be32 *)rdev->ce_fw->data;
2420 WREG32(CP_CE_UCODE_ADDR, 0);
2421 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2422 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2423 WREG32(CP_CE_UCODE_ADDR, 0);
2424
2425 /* ME */
2426 fw_data = (const __be32 *)rdev->me_fw->data;
2427 WREG32(CP_ME_RAM_WADDR, 0);
2428 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2429 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2430 WREG32(CP_ME_RAM_WADDR, 0);
2431
2432 WREG32(CP_PFP_UCODE_ADDR, 0);
2433 WREG32(CP_CE_UCODE_ADDR, 0);
2434 WREG32(CP_ME_RAM_WADDR, 0);
2435 WREG32(CP_ME_RAM_RADDR, 0);
2436 return 0;
2437}
2438
2439/**
2440 * cik_cp_gfx_start - start the gfx ring
2441 *
2442 * @rdev: radeon_device pointer
2443 *
2444 * Enables the ring and loads the clear state context and other
2445 * packets required to init the ring.
2446 * Returns 0 for success, error for failure.
2447 */
2448static int cik_cp_gfx_start(struct radeon_device *rdev)
2449{
2450 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2451 int r, i;
2452
2453 /* init the CP */
2454 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2455 WREG32(CP_ENDIAN_SWAP, 0);
2456 WREG32(CP_DEVICE_ID, 1);
2457
2458 cik_cp_gfx_enable(rdev, true);
2459
2460 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2461 if (r) {
2462 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2463 return r;
2464 }
2465
2466 /* init the CE partitions. CE only used for gfx on CIK */
2467 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2468 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2469 radeon_ring_write(ring, 0xc000);
2470 radeon_ring_write(ring, 0xc000);
2471
2472 /* setup clear context state */
2473 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2474 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2475
2476 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2477 radeon_ring_write(ring, 0x80000000);
2478 radeon_ring_write(ring, 0x80000000);
2479
2480 for (i = 0; i < cik_default_size; i++)
2481 radeon_ring_write(ring, cik_default_state[i]);
2482
2483 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2484 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2485
2486 /* set clear context state */
2487 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2488 radeon_ring_write(ring, 0);
2489
2490 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2491 radeon_ring_write(ring, 0x00000316);
2492 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2493 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2494
2495 radeon_ring_unlock_commit(rdev, ring);
2496
2497 return 0;
2498}
2499
2500/**
2501 * cik_cp_gfx_fini - stop the gfx ring
2502 *
2503 * @rdev: radeon_device pointer
2504 *
2505 * Stop the gfx ring and tear down the driver ring
2506 * info.
2507 */
2508static void cik_cp_gfx_fini(struct radeon_device *rdev)
2509{
2510 cik_cp_gfx_enable(rdev, false);
2511 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2512}
2513
2514/**
2515 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2516 *
2517 * @rdev: radeon_device pointer
2518 *
2519 * Program the location and size of the gfx ring buffer
2520 * and test it to make sure it's working.
2521 * Returns 0 for success, error for failure.
2522 */
2523static int cik_cp_gfx_resume(struct radeon_device *rdev)
2524{
2525 struct radeon_ring *ring;
2526 u32 tmp;
2527 u32 rb_bufsz;
2528 u64 rb_addr;
2529 int r;
2530
2531 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2532 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2533
2534 /* Set the write pointer delay */
2535 WREG32(CP_RB_WPTR_DELAY, 0);
2536
2537 /* set the RB to use vmid 0 */
2538 WREG32(CP_RB_VMID, 0);
2539
2540 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2541
2542 /* ring 0 - compute and gfx */
2543 /* Set ring buffer size */
2544 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2545 rb_bufsz = drm_order(ring->ring_size / 8);
2546 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2547#ifdef __BIG_ENDIAN
2548 tmp |= BUF_SWAP_32BIT;
2549#endif
2550 WREG32(CP_RB0_CNTL, tmp);
2551
2552 /* Initialize the ring buffer's read and write pointers */
2553 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2554 ring->wptr = 0;
2555 WREG32(CP_RB0_WPTR, ring->wptr);
2556
2557 /* set the wb address wether it's enabled or not */
2558 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2559 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2560
2561 /* scratch register shadowing is no longer supported */
2562 WREG32(SCRATCH_UMSK, 0);
2563
2564 if (!rdev->wb.enabled)
2565 tmp |= RB_NO_UPDATE;
2566
2567 mdelay(1);
2568 WREG32(CP_RB0_CNTL, tmp);
2569
2570 rb_addr = ring->gpu_addr >> 8;
2571 WREG32(CP_RB0_BASE, rb_addr);
2572 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2573
2574 ring->rptr = RREG32(CP_RB0_RPTR);
2575
2576 /* start the ring */
2577 cik_cp_gfx_start(rdev);
2578 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2579 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2580 if (r) {
2581 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2582 return r;
2583 }
2584 return 0;
2585}
2586
Alex Deucher963e81f2013-06-26 17:37:11 -04002587u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2588 struct radeon_ring *ring)
2589{
2590 u32 rptr;
2591
2592
2593
2594 if (rdev->wb.enabled) {
2595 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2596 } else {
2597 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2598 rptr = RREG32(CP_HQD_PQ_RPTR);
2599 cik_srbm_select(rdev, 0, 0, 0, 0);
2600 }
2601 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2602
2603 return rptr;
2604}
2605
2606u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2607 struct radeon_ring *ring)
2608{
2609 u32 wptr;
2610
2611 if (rdev->wb.enabled) {
2612 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2613 } else {
2614 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2615 wptr = RREG32(CP_HQD_PQ_WPTR);
2616 cik_srbm_select(rdev, 0, 0, 0, 0);
2617 }
2618 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2619
2620 return wptr;
2621}
2622
2623void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2624 struct radeon_ring *ring)
2625{
2626 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2627
2628 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2629 WDOORBELL32(ring->doorbell_offset, wptr);
2630}
2631
Alex Deucher841cf442012-12-18 21:47:44 -05002632/**
2633 * cik_cp_compute_enable - enable/disable the compute CP MEs
2634 *
2635 * @rdev: radeon_device pointer
2636 * @enable: enable or disable the MEs
2637 *
2638 * Halts or unhalts the compute MEs.
2639 */
2640static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2641{
2642 if (enable)
2643 WREG32(CP_MEC_CNTL, 0);
2644 else
2645 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2646 udelay(50);
2647}
2648
2649/**
2650 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2651 *
2652 * @rdev: radeon_device pointer
2653 *
2654 * Loads the compute MEC1&2 ucode.
2655 * Returns 0 for success, -EINVAL if the ucode is not available.
2656 */
2657static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2658{
2659 const __be32 *fw_data;
2660 int i;
2661
2662 if (!rdev->mec_fw)
2663 return -EINVAL;
2664
2665 cik_cp_compute_enable(rdev, false);
2666
2667 /* MEC1 */
2668 fw_data = (const __be32 *)rdev->mec_fw->data;
2669 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2670 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2672 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2673
2674 if (rdev->family == CHIP_KAVERI) {
2675 /* MEC2 */
2676 fw_data = (const __be32 *)rdev->mec_fw->data;
2677 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2678 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2679 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2680 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2681 }
2682
2683 return 0;
2684}
2685
2686/**
2687 * cik_cp_compute_start - start the compute queues
2688 *
2689 * @rdev: radeon_device pointer
2690 *
2691 * Enable the compute queues.
2692 * Returns 0 for success, error for failure.
2693 */
2694static int cik_cp_compute_start(struct radeon_device *rdev)
2695{
Alex Deucher963e81f2013-06-26 17:37:11 -04002696 cik_cp_compute_enable(rdev, true);
2697
Alex Deucher841cf442012-12-18 21:47:44 -05002698 return 0;
2699}
2700
2701/**
2702 * cik_cp_compute_fini - stop the compute queues
2703 *
2704 * @rdev: radeon_device pointer
2705 *
2706 * Stop the compute queues and tear down the driver queue
2707 * info.
2708 */
2709static void cik_cp_compute_fini(struct radeon_device *rdev)
2710{
Alex Deucher963e81f2013-06-26 17:37:11 -04002711 int i, idx, r;
2712
Alex Deucher841cf442012-12-18 21:47:44 -05002713 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002714
2715 for (i = 0; i < 2; i++) {
2716 if (i == 0)
2717 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2718 else
2719 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2720
2721 if (rdev->ring[idx].mqd_obj) {
2722 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2723 if (unlikely(r != 0))
2724 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2725
2726 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2727 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2728
2729 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2730 rdev->ring[idx].mqd_obj = NULL;
2731 }
2732 }
Alex Deucher841cf442012-12-18 21:47:44 -05002733}
2734
Alex Deucher963e81f2013-06-26 17:37:11 -04002735static void cik_mec_fini(struct radeon_device *rdev)
2736{
2737 int r;
2738
2739 if (rdev->mec.hpd_eop_obj) {
2740 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2741 if (unlikely(r != 0))
2742 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2743 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2744 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2745
2746 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2747 rdev->mec.hpd_eop_obj = NULL;
2748 }
2749}
2750
2751#define MEC_HPD_SIZE 2048
2752
2753static int cik_mec_init(struct radeon_device *rdev)
2754{
2755 int r;
2756 u32 *hpd;
2757
2758 /*
2759 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2760 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2761 */
2762 if (rdev->family == CHIP_KAVERI)
2763 rdev->mec.num_mec = 2;
2764 else
2765 rdev->mec.num_mec = 1;
2766 rdev->mec.num_pipe = 4;
2767 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2768
2769 if (rdev->mec.hpd_eop_obj == NULL) {
2770 r = radeon_bo_create(rdev,
2771 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2772 PAGE_SIZE, true,
2773 RADEON_GEM_DOMAIN_GTT, NULL,
2774 &rdev->mec.hpd_eop_obj);
2775 if (r) {
2776 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2777 return r;
2778 }
2779 }
2780
2781 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2782 if (unlikely(r != 0)) {
2783 cik_mec_fini(rdev);
2784 return r;
2785 }
2786 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2787 &rdev->mec.hpd_eop_gpu_addr);
2788 if (r) {
2789 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2790 cik_mec_fini(rdev);
2791 return r;
2792 }
2793 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2794 if (r) {
2795 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2796 cik_mec_fini(rdev);
2797 return r;
2798 }
2799
2800 /* clear memory. Not sure if this is required or not */
2801 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2802
2803 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2804 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2805
2806 return 0;
2807}
2808
2809struct hqd_registers
2810{
2811 u32 cp_mqd_base_addr;
2812 u32 cp_mqd_base_addr_hi;
2813 u32 cp_hqd_active;
2814 u32 cp_hqd_vmid;
2815 u32 cp_hqd_persistent_state;
2816 u32 cp_hqd_pipe_priority;
2817 u32 cp_hqd_queue_priority;
2818 u32 cp_hqd_quantum;
2819 u32 cp_hqd_pq_base;
2820 u32 cp_hqd_pq_base_hi;
2821 u32 cp_hqd_pq_rptr;
2822 u32 cp_hqd_pq_rptr_report_addr;
2823 u32 cp_hqd_pq_rptr_report_addr_hi;
2824 u32 cp_hqd_pq_wptr_poll_addr;
2825 u32 cp_hqd_pq_wptr_poll_addr_hi;
2826 u32 cp_hqd_pq_doorbell_control;
2827 u32 cp_hqd_pq_wptr;
2828 u32 cp_hqd_pq_control;
2829 u32 cp_hqd_ib_base_addr;
2830 u32 cp_hqd_ib_base_addr_hi;
2831 u32 cp_hqd_ib_rptr;
2832 u32 cp_hqd_ib_control;
2833 u32 cp_hqd_iq_timer;
2834 u32 cp_hqd_iq_rptr;
2835 u32 cp_hqd_dequeue_request;
2836 u32 cp_hqd_dma_offload;
2837 u32 cp_hqd_sema_cmd;
2838 u32 cp_hqd_msg_type;
2839 u32 cp_hqd_atomic0_preop_lo;
2840 u32 cp_hqd_atomic0_preop_hi;
2841 u32 cp_hqd_atomic1_preop_lo;
2842 u32 cp_hqd_atomic1_preop_hi;
2843 u32 cp_hqd_hq_scheduler0;
2844 u32 cp_hqd_hq_scheduler1;
2845 u32 cp_mqd_control;
2846};
2847
2848struct bonaire_mqd
2849{
2850 u32 header;
2851 u32 dispatch_initiator;
2852 u32 dimensions[3];
2853 u32 start_idx[3];
2854 u32 num_threads[3];
2855 u32 pipeline_stat_enable;
2856 u32 perf_counter_enable;
2857 u32 pgm[2];
2858 u32 tba[2];
2859 u32 tma[2];
2860 u32 pgm_rsrc[2];
2861 u32 vmid;
2862 u32 resource_limits;
2863 u32 static_thread_mgmt01[2];
2864 u32 tmp_ring_size;
2865 u32 static_thread_mgmt23[2];
2866 u32 restart[3];
2867 u32 thread_trace_enable;
2868 u32 reserved1;
2869 u32 user_data[16];
2870 u32 vgtcs_invoke_count[2];
2871 struct hqd_registers queue_state;
2872 u32 dequeue_cntr;
2873 u32 interrupt_queue[64];
2874};
2875
Alex Deucher841cf442012-12-18 21:47:44 -05002876/**
2877 * cik_cp_compute_resume - setup the compute queue registers
2878 *
2879 * @rdev: radeon_device pointer
2880 *
2881 * Program the compute queues and test them to make sure they
2882 * are working.
2883 * Returns 0 for success, error for failure.
2884 */
2885static int cik_cp_compute_resume(struct radeon_device *rdev)
2886{
Alex Deucher963e81f2013-06-26 17:37:11 -04002887 int r, i, idx;
2888 u32 tmp;
2889 bool use_doorbell = true;
2890 u64 hqd_gpu_addr;
2891 u64 mqd_gpu_addr;
2892 u64 eop_gpu_addr;
2893 u64 wb_gpu_addr;
2894 u32 *buf;
2895 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002896
Alex Deucher841cf442012-12-18 21:47:44 -05002897 r = cik_cp_compute_start(rdev);
2898 if (r)
2899 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002900
2901 /* fix up chicken bits */
2902 tmp = RREG32(CP_CPF_DEBUG);
2903 tmp |= (1 << 23);
2904 WREG32(CP_CPF_DEBUG, tmp);
2905
2906 /* init the pipes */
2907 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2908 int me = (i < 4) ? 1 : 2;
2909 int pipe = (i < 4) ? i : (i - 4);
2910
2911 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2912
2913 cik_srbm_select(rdev, me, pipe, 0, 0);
2914
2915 /* write the EOP addr */
2916 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2917 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2918
2919 /* set the VMID assigned */
2920 WREG32(CP_HPD_EOP_VMID, 0);
2921
2922 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2923 tmp = RREG32(CP_HPD_EOP_CONTROL);
2924 tmp &= ~EOP_SIZE_MASK;
2925 tmp |= drm_order(MEC_HPD_SIZE / 8);
2926 WREG32(CP_HPD_EOP_CONTROL, tmp);
2927 }
2928 cik_srbm_select(rdev, 0, 0, 0, 0);
2929
2930 /* init the queues. Just two for now. */
2931 for (i = 0; i < 2; i++) {
2932 if (i == 0)
2933 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2934 else
2935 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2936
2937 if (rdev->ring[idx].mqd_obj == NULL) {
2938 r = radeon_bo_create(rdev,
2939 sizeof(struct bonaire_mqd),
2940 PAGE_SIZE, true,
2941 RADEON_GEM_DOMAIN_GTT, NULL,
2942 &rdev->ring[idx].mqd_obj);
2943 if (r) {
2944 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2945 return r;
2946 }
2947 }
2948
2949 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2950 if (unlikely(r != 0)) {
2951 cik_cp_compute_fini(rdev);
2952 return r;
2953 }
2954 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2955 &mqd_gpu_addr);
2956 if (r) {
2957 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2958 cik_cp_compute_fini(rdev);
2959 return r;
2960 }
2961 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2962 if (r) {
2963 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2964 cik_cp_compute_fini(rdev);
2965 return r;
2966 }
2967
2968 /* doorbell offset */
2969 rdev->ring[idx].doorbell_offset =
2970 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2971
2972 /* init the mqd struct */
2973 memset(buf, 0, sizeof(struct bonaire_mqd));
2974
2975 mqd = (struct bonaire_mqd *)buf;
2976 mqd->header = 0xC0310800;
2977 mqd->static_thread_mgmt01[0] = 0xffffffff;
2978 mqd->static_thread_mgmt01[1] = 0xffffffff;
2979 mqd->static_thread_mgmt23[0] = 0xffffffff;
2980 mqd->static_thread_mgmt23[1] = 0xffffffff;
2981
2982 cik_srbm_select(rdev, rdev->ring[idx].me,
2983 rdev->ring[idx].pipe,
2984 rdev->ring[idx].queue, 0);
2985
2986 /* disable wptr polling */
2987 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2988 tmp &= ~WPTR_POLL_EN;
2989 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2990
2991 /* enable doorbell? */
2992 mqd->queue_state.cp_hqd_pq_doorbell_control =
2993 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2994 if (use_doorbell)
2995 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2996 else
2997 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2998 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2999 mqd->queue_state.cp_hqd_pq_doorbell_control);
3000
3001 /* disable the queue if it's active */
3002 mqd->queue_state.cp_hqd_dequeue_request = 0;
3003 mqd->queue_state.cp_hqd_pq_rptr = 0;
3004 mqd->queue_state.cp_hqd_pq_wptr= 0;
3005 if (RREG32(CP_HQD_ACTIVE) & 1) {
3006 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3007 for (i = 0; i < rdev->usec_timeout; i++) {
3008 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3009 break;
3010 udelay(1);
3011 }
3012 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3013 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3014 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3015 }
3016
3017 /* set the pointer to the MQD */
3018 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3019 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3020 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3021 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3022 /* set MQD vmid to 0 */
3023 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3024 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3025 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3026
3027 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3028 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3029 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3030 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3031 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3032 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3033
3034 /* set up the HQD, this is similar to CP_RB0_CNTL */
3035 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3036 mqd->queue_state.cp_hqd_pq_control &=
3037 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3038
3039 mqd->queue_state.cp_hqd_pq_control |=
3040 drm_order(rdev->ring[idx].ring_size / 8);
3041 mqd->queue_state.cp_hqd_pq_control |=
3042 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3043#ifdef __BIG_ENDIAN
3044 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3045#endif
3046 mqd->queue_state.cp_hqd_pq_control &=
3047 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3048 mqd->queue_state.cp_hqd_pq_control |=
3049 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3050 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3051
3052 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3053 if (i == 0)
3054 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3055 else
3056 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3057 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3058 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3059 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3060 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3061 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3062
3063 /* set the wb address wether it's enabled or not */
3064 if (i == 0)
3065 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3066 else
3067 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3068 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3069 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3070 upper_32_bits(wb_gpu_addr) & 0xffff;
3071 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3072 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3073 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3074 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3075
3076 /* enable the doorbell if requested */
3077 if (use_doorbell) {
3078 mqd->queue_state.cp_hqd_pq_doorbell_control =
3079 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3080 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3081 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3082 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3083 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3084 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3085 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3086
3087 } else {
3088 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3089 }
3090 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3091 mqd->queue_state.cp_hqd_pq_doorbell_control);
3092
3093 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3094 rdev->ring[idx].wptr = 0;
3095 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3096 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3097 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3098 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3099
3100 /* set the vmid for the queue */
3101 mqd->queue_state.cp_hqd_vmid = 0;
3102 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3103
3104 /* activate the queue */
3105 mqd->queue_state.cp_hqd_active = 1;
3106 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3107
3108 cik_srbm_select(rdev, 0, 0, 0, 0);
3109
3110 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3111 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3112
3113 rdev->ring[idx].ready = true;
3114 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3115 if (r)
3116 rdev->ring[idx].ready = false;
3117 }
3118
Alex Deucher841cf442012-12-18 21:47:44 -05003119 return 0;
3120}
3121
Alex Deucher841cf442012-12-18 21:47:44 -05003122static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3123{
3124 cik_cp_gfx_enable(rdev, enable);
3125 cik_cp_compute_enable(rdev, enable);
3126}
3127
Alex Deucher841cf442012-12-18 21:47:44 -05003128static int cik_cp_load_microcode(struct radeon_device *rdev)
3129{
3130 int r;
3131
3132 r = cik_cp_gfx_load_microcode(rdev);
3133 if (r)
3134 return r;
3135 r = cik_cp_compute_load_microcode(rdev);
3136 if (r)
3137 return r;
3138
3139 return 0;
3140}
3141
Alex Deucher841cf442012-12-18 21:47:44 -05003142static void cik_cp_fini(struct radeon_device *rdev)
3143{
3144 cik_cp_gfx_fini(rdev);
3145 cik_cp_compute_fini(rdev);
3146}
3147
Alex Deucher841cf442012-12-18 21:47:44 -05003148static int cik_cp_resume(struct radeon_device *rdev)
3149{
3150 int r;
3151
3152 /* Reset all cp blocks */
3153 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3154 RREG32(GRBM_SOFT_RESET);
3155 mdelay(15);
3156 WREG32(GRBM_SOFT_RESET, 0);
3157 RREG32(GRBM_SOFT_RESET);
3158
3159 r = cik_cp_load_microcode(rdev);
3160 if (r)
3161 return r;
3162
3163 r = cik_cp_gfx_resume(rdev);
3164 if (r)
3165 return r;
3166 r = cik_cp_compute_resume(rdev);
3167 if (r)
3168 return r;
3169
3170 return 0;
3171}
3172
Alex Deucher21a93e12013-04-09 12:47:11 -04003173/*
3174 * sDMA - System DMA
3175 * Starting with CIK, the GPU has new asynchronous
3176 * DMA engines. These engines are used for compute
3177 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3178 * and each one supports 1 ring buffer used for gfx
3179 * and 2 queues used for compute.
3180 *
3181 * The programming model is very similar to the CP
3182 * (ring buffer, IBs, etc.), but sDMA has it's own
3183 * packet format that is different from the PM4 format
3184 * used by the CP. sDMA supports copying data, writing
3185 * embedded data, solid fills, and a number of other
3186 * things. It also has support for tiling/detiling of
3187 * buffers.
3188 */
3189/**
3190 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3191 *
3192 * @rdev: radeon_device pointer
3193 * @ib: IB object to schedule
3194 *
3195 * Schedule an IB in the DMA ring (CIK).
3196 */
3197void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3198 struct radeon_ib *ib)
3199{
3200 struct radeon_ring *ring = &rdev->ring[ib->ring];
3201 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3202
3203 if (rdev->wb.enabled) {
3204 u32 next_rptr = ring->wptr + 5;
3205 while ((next_rptr & 7) != 4)
3206 next_rptr++;
3207 next_rptr += 4;
3208 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3209 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3210 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3211 radeon_ring_write(ring, 1); /* number of DWs to follow */
3212 radeon_ring_write(ring, next_rptr);
3213 }
3214
3215 /* IB packet must end on a 8 DW boundary */
3216 while ((ring->wptr & 7) != 4)
3217 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3218 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3219 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3220 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3221 radeon_ring_write(ring, ib->length_dw);
3222
3223}
3224
3225/**
3226 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3227 *
3228 * @rdev: radeon_device pointer
3229 * @fence: radeon fence object
3230 *
3231 * Add a DMA fence packet to the ring to write
3232 * the fence seq number and DMA trap packet to generate
3233 * an interrupt if needed (CIK).
3234 */
3235void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3236 struct radeon_fence *fence)
3237{
3238 struct radeon_ring *ring = &rdev->ring[fence->ring];
3239 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3240 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3241 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3242 u32 ref_and_mask;
3243
3244 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3245 ref_and_mask = SDMA0;
3246 else
3247 ref_and_mask = SDMA1;
3248
3249 /* write the fence */
3250 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3251 radeon_ring_write(ring, addr & 0xffffffff);
3252 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3253 radeon_ring_write(ring, fence->seq);
3254 /* generate an interrupt */
3255 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3256 /* flush HDP */
3257 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3258 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3259 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3260 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3261 radeon_ring_write(ring, ref_and_mask); /* MASK */
3262 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3263}
3264
3265/**
3266 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3267 *
3268 * @rdev: radeon_device pointer
3269 * @ring: radeon_ring structure holding ring information
3270 * @semaphore: radeon semaphore object
3271 * @emit_wait: wait or signal semaphore
3272 *
3273 * Add a DMA semaphore packet to the ring wait on or signal
3274 * other rings (CIK).
3275 */
3276void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3277 struct radeon_ring *ring,
3278 struct radeon_semaphore *semaphore,
3279 bool emit_wait)
3280{
3281 u64 addr = semaphore->gpu_addr;
3282 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3283
3284 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3285 radeon_ring_write(ring, addr & 0xfffffff8);
3286 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3287}
3288
3289/**
3290 * cik_sdma_gfx_stop - stop the gfx async dma engines
3291 *
3292 * @rdev: radeon_device pointer
3293 *
3294 * Stop the gfx async dma ring buffers (CIK).
3295 */
3296static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3297{
3298 u32 rb_cntl, reg_offset;
3299 int i;
3300
3301 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3302
3303 for (i = 0; i < 2; i++) {
3304 if (i == 0)
3305 reg_offset = SDMA0_REGISTER_OFFSET;
3306 else
3307 reg_offset = SDMA1_REGISTER_OFFSET;
3308 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3309 rb_cntl &= ~SDMA_RB_ENABLE;
3310 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3311 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3312 }
3313}
3314
3315/**
3316 * cik_sdma_rlc_stop - stop the compute async dma engines
3317 *
3318 * @rdev: radeon_device pointer
3319 *
3320 * Stop the compute async dma queues (CIK).
3321 */
3322static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3323{
3324 /* XXX todo */
3325}
3326
3327/**
3328 * cik_sdma_enable - stop the async dma engines
3329 *
3330 * @rdev: radeon_device pointer
3331 * @enable: enable/disable the DMA MEs.
3332 *
3333 * Halt or unhalt the async dma engines (CIK).
3334 */
3335static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3336{
3337 u32 me_cntl, reg_offset;
3338 int i;
3339
3340 for (i = 0; i < 2; i++) {
3341 if (i == 0)
3342 reg_offset = SDMA0_REGISTER_OFFSET;
3343 else
3344 reg_offset = SDMA1_REGISTER_OFFSET;
3345 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3346 if (enable)
3347 me_cntl &= ~SDMA_HALT;
3348 else
3349 me_cntl |= SDMA_HALT;
3350 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3351 }
3352}
3353
3354/**
3355 * cik_sdma_gfx_resume - setup and start the async dma engines
3356 *
3357 * @rdev: radeon_device pointer
3358 *
3359 * Set up the gfx DMA ring buffers and enable them (CIK).
3360 * Returns 0 for success, error for failure.
3361 */
3362static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3363{
3364 struct radeon_ring *ring;
3365 u32 rb_cntl, ib_cntl;
3366 u32 rb_bufsz;
3367 u32 reg_offset, wb_offset;
3368 int i, r;
3369
3370 for (i = 0; i < 2; i++) {
3371 if (i == 0) {
3372 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3373 reg_offset = SDMA0_REGISTER_OFFSET;
3374 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3375 } else {
3376 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3377 reg_offset = SDMA1_REGISTER_OFFSET;
3378 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3379 }
3380
3381 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3382 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3383
3384 /* Set ring buffer size in dwords */
3385 rb_bufsz = drm_order(ring->ring_size / 4);
3386 rb_cntl = rb_bufsz << 1;
3387#ifdef __BIG_ENDIAN
3388 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3389#endif
3390 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3391
3392 /* Initialize the ring buffer's read and write pointers */
3393 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3394 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3395
3396 /* set the wb address whether it's enabled or not */
3397 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3398 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3399 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3400 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3401
3402 if (rdev->wb.enabled)
3403 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3404
3405 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3406 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3407
3408 ring->wptr = 0;
3409 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3410
3411 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3412
3413 /* enable DMA RB */
3414 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3415
3416 ib_cntl = SDMA_IB_ENABLE;
3417#ifdef __BIG_ENDIAN
3418 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3419#endif
3420 /* enable DMA IBs */
3421 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3422
3423 ring->ready = true;
3424
3425 r = radeon_ring_test(rdev, ring->idx, ring);
3426 if (r) {
3427 ring->ready = false;
3428 return r;
3429 }
3430 }
3431
3432 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3433
3434 return 0;
3435}
3436
3437/**
3438 * cik_sdma_rlc_resume - setup and start the async dma engines
3439 *
3440 * @rdev: radeon_device pointer
3441 *
3442 * Set up the compute DMA queues and enable them (CIK).
3443 * Returns 0 for success, error for failure.
3444 */
3445static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3446{
3447 /* XXX todo */
3448 return 0;
3449}
3450
3451/**
3452 * cik_sdma_load_microcode - load the sDMA ME ucode
3453 *
3454 * @rdev: radeon_device pointer
3455 *
3456 * Loads the sDMA0/1 ucode.
3457 * Returns 0 for success, -EINVAL if the ucode is not available.
3458 */
3459static int cik_sdma_load_microcode(struct radeon_device *rdev)
3460{
3461 const __be32 *fw_data;
3462 int i;
3463
3464 if (!rdev->sdma_fw)
3465 return -EINVAL;
3466
3467 /* stop the gfx rings and rlc compute queues */
3468 cik_sdma_gfx_stop(rdev);
3469 cik_sdma_rlc_stop(rdev);
3470
3471 /* halt the MEs */
3472 cik_sdma_enable(rdev, false);
3473
3474 /* sdma0 */
3475 fw_data = (const __be32 *)rdev->sdma_fw->data;
3476 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3477 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3478 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3479 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3480
3481 /* sdma1 */
3482 fw_data = (const __be32 *)rdev->sdma_fw->data;
3483 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3484 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3485 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3486 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3487
3488 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3489 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3490 return 0;
3491}
3492
3493/**
3494 * cik_sdma_resume - setup and start the async dma engines
3495 *
3496 * @rdev: radeon_device pointer
3497 *
3498 * Set up the DMA engines and enable them (CIK).
3499 * Returns 0 for success, error for failure.
3500 */
3501static int cik_sdma_resume(struct radeon_device *rdev)
3502{
3503 int r;
3504
3505 /* Reset dma */
3506 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3507 RREG32(SRBM_SOFT_RESET);
3508 udelay(50);
3509 WREG32(SRBM_SOFT_RESET, 0);
3510 RREG32(SRBM_SOFT_RESET);
3511
3512 r = cik_sdma_load_microcode(rdev);
3513 if (r)
3514 return r;
3515
3516 /* unhalt the MEs */
3517 cik_sdma_enable(rdev, true);
3518
3519 /* start the gfx rings and rlc compute queues */
3520 r = cik_sdma_gfx_resume(rdev);
3521 if (r)
3522 return r;
3523 r = cik_sdma_rlc_resume(rdev);
3524 if (r)
3525 return r;
3526
3527 return 0;
3528}
3529
3530/**
3531 * cik_sdma_fini - tear down the async dma engines
3532 *
3533 * @rdev: radeon_device pointer
3534 *
3535 * Stop the async dma engines and free the rings (CIK).
3536 */
3537static void cik_sdma_fini(struct radeon_device *rdev)
3538{
3539 /* stop the gfx rings and rlc compute queues */
3540 cik_sdma_gfx_stop(rdev);
3541 cik_sdma_rlc_stop(rdev);
3542 /* halt the MEs */
3543 cik_sdma_enable(rdev, false);
3544 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3545 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3546 /* XXX - compute dma queue tear down */
3547}
3548
3549/**
3550 * cik_copy_dma - copy pages using the DMA engine
3551 *
3552 * @rdev: radeon_device pointer
3553 * @src_offset: src GPU address
3554 * @dst_offset: dst GPU address
3555 * @num_gpu_pages: number of GPU pages to xfer
3556 * @fence: radeon fence object
3557 *
3558 * Copy GPU paging using the DMA engine (CIK).
3559 * Used by the radeon ttm implementation to move pages if
3560 * registered as the asic copy callback.
3561 */
3562int cik_copy_dma(struct radeon_device *rdev,
3563 uint64_t src_offset, uint64_t dst_offset,
3564 unsigned num_gpu_pages,
3565 struct radeon_fence **fence)
3566{
3567 struct radeon_semaphore *sem = NULL;
3568 int ring_index = rdev->asic->copy.dma_ring_index;
3569 struct radeon_ring *ring = &rdev->ring[ring_index];
3570 u32 size_in_bytes, cur_size_in_bytes;
3571 int i, num_loops;
3572 int r = 0;
3573
3574 r = radeon_semaphore_create(rdev, &sem);
3575 if (r) {
3576 DRM_ERROR("radeon: moving bo (%d).\n", r);
3577 return r;
3578 }
3579
3580 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3581 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3582 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3583 if (r) {
3584 DRM_ERROR("radeon: moving bo (%d).\n", r);
3585 radeon_semaphore_free(rdev, &sem, NULL);
3586 return r;
3587 }
3588
3589 if (radeon_fence_need_sync(*fence, ring->idx)) {
3590 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3591 ring->idx);
3592 radeon_fence_note_sync(*fence, ring->idx);
3593 } else {
3594 radeon_semaphore_free(rdev, &sem, NULL);
3595 }
3596
3597 for (i = 0; i < num_loops; i++) {
3598 cur_size_in_bytes = size_in_bytes;
3599 if (cur_size_in_bytes > 0x1fffff)
3600 cur_size_in_bytes = 0x1fffff;
3601 size_in_bytes -= cur_size_in_bytes;
3602 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3603 radeon_ring_write(ring, cur_size_in_bytes);
3604 radeon_ring_write(ring, 0); /* src/dst endian swap */
3605 radeon_ring_write(ring, src_offset & 0xffffffff);
3606 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3607 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3608 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3609 src_offset += cur_size_in_bytes;
3610 dst_offset += cur_size_in_bytes;
3611 }
3612
3613 r = radeon_fence_emit(rdev, fence, ring->idx);
3614 if (r) {
3615 radeon_ring_unlock_undo(rdev, ring);
3616 return r;
3617 }
3618
3619 radeon_ring_unlock_commit(rdev, ring);
3620 radeon_semaphore_free(rdev, &sem, *fence);
3621
3622 return r;
3623}
3624
3625/**
3626 * cik_sdma_ring_test - simple async dma engine test
3627 *
3628 * @rdev: radeon_device pointer
3629 * @ring: radeon_ring structure holding ring information
3630 *
3631 * Test the DMA engine by writing using it to write an
3632 * value to memory. (CIK).
3633 * Returns 0 for success, error for failure.
3634 */
3635int cik_sdma_ring_test(struct radeon_device *rdev,
3636 struct radeon_ring *ring)
3637{
3638 unsigned i;
3639 int r;
3640 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3641 u32 tmp;
3642
3643 if (!ptr) {
3644 DRM_ERROR("invalid vram scratch pointer\n");
3645 return -EINVAL;
3646 }
3647
3648 tmp = 0xCAFEDEAD;
3649 writel(tmp, ptr);
3650
3651 r = radeon_ring_lock(rdev, ring, 4);
3652 if (r) {
3653 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3654 return r;
3655 }
3656 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3657 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3658 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3659 radeon_ring_write(ring, 1); /* number of DWs to follow */
3660 radeon_ring_write(ring, 0xDEADBEEF);
3661 radeon_ring_unlock_commit(rdev, ring);
3662
3663 for (i = 0; i < rdev->usec_timeout; i++) {
3664 tmp = readl(ptr);
3665 if (tmp == 0xDEADBEEF)
3666 break;
3667 DRM_UDELAY(1);
3668 }
3669
3670 if (i < rdev->usec_timeout) {
3671 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3672 } else {
3673 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3674 ring->idx, tmp);
3675 r = -EINVAL;
3676 }
3677 return r;
3678}
3679
3680/**
3681 * cik_sdma_ib_test - test an IB on the DMA engine
3682 *
3683 * @rdev: radeon_device pointer
3684 * @ring: radeon_ring structure holding ring information
3685 *
3686 * Test a simple IB in the DMA ring (CIK).
3687 * Returns 0 on success, error on failure.
3688 */
3689int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3690{
3691 struct radeon_ib ib;
3692 unsigned i;
3693 int r;
3694 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3695 u32 tmp = 0;
3696
3697 if (!ptr) {
3698 DRM_ERROR("invalid vram scratch pointer\n");
3699 return -EINVAL;
3700 }
3701
3702 tmp = 0xCAFEDEAD;
3703 writel(tmp, ptr);
3704
3705 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3706 if (r) {
3707 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3708 return r;
3709 }
3710
3711 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3712 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3713 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3714 ib.ptr[3] = 1;
3715 ib.ptr[4] = 0xDEADBEEF;
3716 ib.length_dw = 5;
3717
3718 r = radeon_ib_schedule(rdev, &ib, NULL);
3719 if (r) {
3720 radeon_ib_free(rdev, &ib);
3721 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3722 return r;
3723 }
3724 r = radeon_fence_wait(ib.fence, false);
3725 if (r) {
3726 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3727 return r;
3728 }
3729 for (i = 0; i < rdev->usec_timeout; i++) {
3730 tmp = readl(ptr);
3731 if (tmp == 0xDEADBEEF)
3732 break;
3733 DRM_UDELAY(1);
3734 }
3735 if (i < rdev->usec_timeout) {
3736 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3737 } else {
3738 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3739 r = -EINVAL;
3740 }
3741 radeon_ib_free(rdev, &ib);
3742 return r;
3743}
3744
Alex Deuchercc066712013-04-09 12:59:51 -04003745
3746static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3747{
3748 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3749 RREG32(GRBM_STATUS));
3750 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3751 RREG32(GRBM_STATUS2));
3752 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3753 RREG32(GRBM_STATUS_SE0));
3754 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3755 RREG32(GRBM_STATUS_SE1));
3756 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3757 RREG32(GRBM_STATUS_SE2));
3758 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3759 RREG32(GRBM_STATUS_SE3));
3760 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3761 RREG32(SRBM_STATUS));
3762 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3763 RREG32(SRBM_STATUS2));
3764 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3765 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3766 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3767 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003768 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3769 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3770 RREG32(CP_STALLED_STAT1));
3771 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3772 RREG32(CP_STALLED_STAT2));
3773 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3774 RREG32(CP_STALLED_STAT3));
3775 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3776 RREG32(CP_CPF_BUSY_STAT));
3777 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3778 RREG32(CP_CPF_STALLED_STAT1));
3779 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3780 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3781 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3782 RREG32(CP_CPC_STALLED_STAT1));
3783 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003784}
3785
Alex Deucher6f2043c2013-04-09 12:43:41 -04003786/**
Alex Deuchercc066712013-04-09 12:59:51 -04003787 * cik_gpu_check_soft_reset - check which blocks are busy
3788 *
3789 * @rdev: radeon_device pointer
3790 *
3791 * Check which blocks are busy and return the relevant reset
3792 * mask to be used by cik_gpu_soft_reset().
3793 * Returns a mask of the blocks to be reset.
3794 */
3795static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3796{
3797 u32 reset_mask = 0;
3798 u32 tmp;
3799
3800 /* GRBM_STATUS */
3801 tmp = RREG32(GRBM_STATUS);
3802 if (tmp & (PA_BUSY | SC_BUSY |
3803 BCI_BUSY | SX_BUSY |
3804 TA_BUSY | VGT_BUSY |
3805 DB_BUSY | CB_BUSY |
3806 GDS_BUSY | SPI_BUSY |
3807 IA_BUSY | IA_BUSY_NO_DMA))
3808 reset_mask |= RADEON_RESET_GFX;
3809
3810 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3811 reset_mask |= RADEON_RESET_CP;
3812
3813 /* GRBM_STATUS2 */
3814 tmp = RREG32(GRBM_STATUS2);
3815 if (tmp & RLC_BUSY)
3816 reset_mask |= RADEON_RESET_RLC;
3817
3818 /* SDMA0_STATUS_REG */
3819 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3820 if (!(tmp & SDMA_IDLE))
3821 reset_mask |= RADEON_RESET_DMA;
3822
3823 /* SDMA1_STATUS_REG */
3824 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3825 if (!(tmp & SDMA_IDLE))
3826 reset_mask |= RADEON_RESET_DMA1;
3827
3828 /* SRBM_STATUS2 */
3829 tmp = RREG32(SRBM_STATUS2);
3830 if (tmp & SDMA_BUSY)
3831 reset_mask |= RADEON_RESET_DMA;
3832
3833 if (tmp & SDMA1_BUSY)
3834 reset_mask |= RADEON_RESET_DMA1;
3835
3836 /* SRBM_STATUS */
3837 tmp = RREG32(SRBM_STATUS);
3838
3839 if (tmp & IH_BUSY)
3840 reset_mask |= RADEON_RESET_IH;
3841
3842 if (tmp & SEM_BUSY)
3843 reset_mask |= RADEON_RESET_SEM;
3844
3845 if (tmp & GRBM_RQ_PENDING)
3846 reset_mask |= RADEON_RESET_GRBM;
3847
3848 if (tmp & VMC_BUSY)
3849 reset_mask |= RADEON_RESET_VMC;
3850
3851 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3852 MCC_BUSY | MCD_BUSY))
3853 reset_mask |= RADEON_RESET_MC;
3854
3855 if (evergreen_is_display_hung(rdev))
3856 reset_mask |= RADEON_RESET_DISPLAY;
3857
3858 /* Skip MC reset as it's mostly likely not hung, just busy */
3859 if (reset_mask & RADEON_RESET_MC) {
3860 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3861 reset_mask &= ~RADEON_RESET_MC;
3862 }
3863
3864 return reset_mask;
3865}
3866
3867/**
3868 * cik_gpu_soft_reset - soft reset GPU
3869 *
3870 * @rdev: radeon_device pointer
3871 * @reset_mask: mask of which blocks to reset
3872 *
3873 * Soft reset the blocks specified in @reset_mask.
3874 */
3875static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3876{
3877 struct evergreen_mc_save save;
3878 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3879 u32 tmp;
3880
3881 if (reset_mask == 0)
3882 return;
3883
3884 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3885
3886 cik_print_gpu_status_regs(rdev);
3887 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3888 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3889 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3890 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3891
3892 /* stop the rlc */
3893 cik_rlc_stop(rdev);
3894
3895 /* Disable GFX parsing/prefetching */
3896 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3897
3898 /* Disable MEC parsing/prefetching */
3899 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3900
3901 if (reset_mask & RADEON_RESET_DMA) {
3902 /* sdma0 */
3903 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3904 tmp |= SDMA_HALT;
3905 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3906 }
3907 if (reset_mask & RADEON_RESET_DMA1) {
3908 /* sdma1 */
3909 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3910 tmp |= SDMA_HALT;
3911 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3912 }
3913
3914 evergreen_mc_stop(rdev, &save);
3915 if (evergreen_mc_wait_for_idle(rdev)) {
3916 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3917 }
3918
3919 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3920 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3921
3922 if (reset_mask & RADEON_RESET_CP) {
3923 grbm_soft_reset |= SOFT_RESET_CP;
3924
3925 srbm_soft_reset |= SOFT_RESET_GRBM;
3926 }
3927
3928 if (reset_mask & RADEON_RESET_DMA)
3929 srbm_soft_reset |= SOFT_RESET_SDMA;
3930
3931 if (reset_mask & RADEON_RESET_DMA1)
3932 srbm_soft_reset |= SOFT_RESET_SDMA1;
3933
3934 if (reset_mask & RADEON_RESET_DISPLAY)
3935 srbm_soft_reset |= SOFT_RESET_DC;
3936
3937 if (reset_mask & RADEON_RESET_RLC)
3938 grbm_soft_reset |= SOFT_RESET_RLC;
3939
3940 if (reset_mask & RADEON_RESET_SEM)
3941 srbm_soft_reset |= SOFT_RESET_SEM;
3942
3943 if (reset_mask & RADEON_RESET_IH)
3944 srbm_soft_reset |= SOFT_RESET_IH;
3945
3946 if (reset_mask & RADEON_RESET_GRBM)
3947 srbm_soft_reset |= SOFT_RESET_GRBM;
3948
3949 if (reset_mask & RADEON_RESET_VMC)
3950 srbm_soft_reset |= SOFT_RESET_VMC;
3951
3952 if (!(rdev->flags & RADEON_IS_IGP)) {
3953 if (reset_mask & RADEON_RESET_MC)
3954 srbm_soft_reset |= SOFT_RESET_MC;
3955 }
3956
3957 if (grbm_soft_reset) {
3958 tmp = RREG32(GRBM_SOFT_RESET);
3959 tmp |= grbm_soft_reset;
3960 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3961 WREG32(GRBM_SOFT_RESET, tmp);
3962 tmp = RREG32(GRBM_SOFT_RESET);
3963
3964 udelay(50);
3965
3966 tmp &= ~grbm_soft_reset;
3967 WREG32(GRBM_SOFT_RESET, tmp);
3968 tmp = RREG32(GRBM_SOFT_RESET);
3969 }
3970
3971 if (srbm_soft_reset) {
3972 tmp = RREG32(SRBM_SOFT_RESET);
3973 tmp |= srbm_soft_reset;
3974 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3975 WREG32(SRBM_SOFT_RESET, tmp);
3976 tmp = RREG32(SRBM_SOFT_RESET);
3977
3978 udelay(50);
3979
3980 tmp &= ~srbm_soft_reset;
3981 WREG32(SRBM_SOFT_RESET, tmp);
3982 tmp = RREG32(SRBM_SOFT_RESET);
3983 }
3984
3985 /* Wait a little for things to settle down */
3986 udelay(50);
3987
3988 evergreen_mc_resume(rdev, &save);
3989 udelay(50);
3990
3991 cik_print_gpu_status_regs(rdev);
3992}
3993
3994/**
3995 * cik_asic_reset - soft reset GPU
3996 *
3997 * @rdev: radeon_device pointer
3998 *
3999 * Look up which blocks are hung and attempt
4000 * to reset them.
4001 * Returns 0 for success.
4002 */
4003int cik_asic_reset(struct radeon_device *rdev)
4004{
4005 u32 reset_mask;
4006
4007 reset_mask = cik_gpu_check_soft_reset(rdev);
4008
4009 if (reset_mask)
4010 r600_set_bios_scratch_engine_hung(rdev, true);
4011
4012 cik_gpu_soft_reset(rdev, reset_mask);
4013
4014 reset_mask = cik_gpu_check_soft_reset(rdev);
4015
4016 if (!reset_mask)
4017 r600_set_bios_scratch_engine_hung(rdev, false);
4018
4019 return 0;
4020}
4021
4022/**
4023 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004024 *
4025 * @rdev: radeon_device pointer
4026 * @ring: radeon_ring structure holding ring information
4027 *
4028 * Check if the 3D engine is locked up (CIK).
4029 * Returns true if the engine is locked, false if not.
4030 */
Alex Deuchercc066712013-04-09 12:59:51 -04004031bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004032{
Alex Deuchercc066712013-04-09 12:59:51 -04004033 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004034
Alex Deuchercc066712013-04-09 12:59:51 -04004035 if (!(reset_mask & (RADEON_RESET_GFX |
4036 RADEON_RESET_COMPUTE |
4037 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004038 radeon_ring_lockup_update(ring);
4039 return false;
4040 }
4041 /* force CP activities */
4042 radeon_ring_force_activity(rdev, ring);
4043 return radeon_ring_test_lockup(rdev, ring);
4044}
4045
4046/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004047 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4048 *
4049 * @rdev: radeon_device pointer
4050 * @ring: radeon_ring structure holding ring information
4051 *
4052 * Check if the async DMA engine is locked up (CIK).
4053 * Returns true if the engine appears to be locked up, false if not.
4054 */
4055bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4056{
Alex Deuchercc066712013-04-09 12:59:51 -04004057 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4058 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004059
4060 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004061 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004062 else
Alex Deuchercc066712013-04-09 12:59:51 -04004063 mask = RADEON_RESET_DMA1;
4064
4065 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004066 radeon_ring_lockup_update(ring);
4067 return false;
4068 }
4069 /* force ring activities */
4070 radeon_ring_force_activity(rdev, ring);
4071 return radeon_ring_test_lockup(rdev, ring);
4072}
4073
Alex Deucher1c491652013-04-09 12:45:26 -04004074/* MC */
4075/**
4076 * cik_mc_program - program the GPU memory controller
4077 *
4078 * @rdev: radeon_device pointer
4079 *
4080 * Set the location of vram, gart, and AGP in the GPU's
4081 * physical address space (CIK).
4082 */
4083static void cik_mc_program(struct radeon_device *rdev)
4084{
4085 struct evergreen_mc_save save;
4086 u32 tmp;
4087 int i, j;
4088
4089 /* Initialize HDP */
4090 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4091 WREG32((0x2c14 + j), 0x00000000);
4092 WREG32((0x2c18 + j), 0x00000000);
4093 WREG32((0x2c1c + j), 0x00000000);
4094 WREG32((0x2c20 + j), 0x00000000);
4095 WREG32((0x2c24 + j), 0x00000000);
4096 }
4097 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4098
4099 evergreen_mc_stop(rdev, &save);
4100 if (radeon_mc_wait_for_idle(rdev)) {
4101 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4102 }
4103 /* Lockout access through VGA aperture*/
4104 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4105 /* Update configuration */
4106 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4107 rdev->mc.vram_start >> 12);
4108 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4109 rdev->mc.vram_end >> 12);
4110 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4111 rdev->vram_scratch.gpu_addr >> 12);
4112 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4113 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4114 WREG32(MC_VM_FB_LOCATION, tmp);
4115 /* XXX double check these! */
4116 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4117 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4118 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4119 WREG32(MC_VM_AGP_BASE, 0);
4120 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4121 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4122 if (radeon_mc_wait_for_idle(rdev)) {
4123 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4124 }
4125 evergreen_mc_resume(rdev, &save);
4126 /* we need to own VRAM, so turn off the VGA renderer here
4127 * to stop it overwriting our objects */
4128 rv515_vga_render_disable(rdev);
4129}
4130
4131/**
4132 * cik_mc_init - initialize the memory controller driver params
4133 *
4134 * @rdev: radeon_device pointer
4135 *
4136 * Look up the amount of vram, vram width, and decide how to place
4137 * vram and gart within the GPU's physical address space (CIK).
4138 * Returns 0 for success.
4139 */
4140static int cik_mc_init(struct radeon_device *rdev)
4141{
4142 u32 tmp;
4143 int chansize, numchan;
4144
4145 /* Get VRAM informations */
4146 rdev->mc.vram_is_ddr = true;
4147 tmp = RREG32(MC_ARB_RAMCFG);
4148 if (tmp & CHANSIZE_MASK) {
4149 chansize = 64;
4150 } else {
4151 chansize = 32;
4152 }
4153 tmp = RREG32(MC_SHARED_CHMAP);
4154 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4155 case 0:
4156 default:
4157 numchan = 1;
4158 break;
4159 case 1:
4160 numchan = 2;
4161 break;
4162 case 2:
4163 numchan = 4;
4164 break;
4165 case 3:
4166 numchan = 8;
4167 break;
4168 case 4:
4169 numchan = 3;
4170 break;
4171 case 5:
4172 numchan = 6;
4173 break;
4174 case 6:
4175 numchan = 10;
4176 break;
4177 case 7:
4178 numchan = 12;
4179 break;
4180 case 8:
4181 numchan = 16;
4182 break;
4183 }
4184 rdev->mc.vram_width = numchan * chansize;
4185 /* Could aper size report 0 ? */
4186 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4187 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4188 /* size in MB on si */
4189 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4190 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4191 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4192 si_vram_gtt_location(rdev, &rdev->mc);
4193 radeon_update_bandwidth_info(rdev);
4194
4195 return 0;
4196}
4197
4198/*
4199 * GART
4200 * VMID 0 is the physical GPU addresses as used by the kernel.
4201 * VMIDs 1-15 are used for userspace clients and are handled
4202 * by the radeon vm/hsa code.
4203 */
4204/**
4205 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4206 *
4207 * @rdev: radeon_device pointer
4208 *
4209 * Flush the TLB for the VMID 0 page table (CIK).
4210 */
4211void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4212{
4213 /* flush hdp cache */
4214 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4215
4216 /* bits 0-15 are the VM contexts0-15 */
4217 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4218}
4219
4220/**
4221 * cik_pcie_gart_enable - gart enable
4222 *
4223 * @rdev: radeon_device pointer
4224 *
4225 * This sets up the TLBs, programs the page tables for VMID0,
4226 * sets up the hw for VMIDs 1-15 which are allocated on
4227 * demand, and sets up the global locations for the LDS, GDS,
4228 * and GPUVM for FSA64 clients (CIK).
4229 * Returns 0 for success, errors for failure.
4230 */
4231static int cik_pcie_gart_enable(struct radeon_device *rdev)
4232{
4233 int r, i;
4234
4235 if (rdev->gart.robj == NULL) {
4236 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4237 return -EINVAL;
4238 }
4239 r = radeon_gart_table_vram_pin(rdev);
4240 if (r)
4241 return r;
4242 radeon_gart_restore(rdev);
4243 /* Setup TLB control */
4244 WREG32(MC_VM_MX_L1_TLB_CNTL,
4245 (0xA << 7) |
4246 ENABLE_L1_TLB |
4247 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4248 ENABLE_ADVANCED_DRIVER_MODEL |
4249 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4250 /* Setup L2 cache */
4251 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4252 ENABLE_L2_FRAGMENT_PROCESSING |
4253 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4254 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4255 EFFECTIVE_L2_QUEUE_SIZE(7) |
4256 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4257 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4258 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4259 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4260 /* setup context0 */
4261 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4262 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4263 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4264 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4265 (u32)(rdev->dummy_page.addr >> 12));
4266 WREG32(VM_CONTEXT0_CNTL2, 0);
4267 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4268 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4269
4270 WREG32(0x15D4, 0);
4271 WREG32(0x15D8, 0);
4272 WREG32(0x15DC, 0);
4273
4274 /* empty context1-15 */
4275 /* FIXME start with 4G, once using 2 level pt switch to full
4276 * vm size space
4277 */
4278 /* set vm size, must be a multiple of 4 */
4279 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4280 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4281 for (i = 1; i < 16; i++) {
4282 if (i < 8)
4283 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4284 rdev->gart.table_addr >> 12);
4285 else
4286 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4287 rdev->gart.table_addr >> 12);
4288 }
4289
4290 /* enable context1-15 */
4291 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4292 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004293 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004294 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004295 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4296 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4297 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4298 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4299 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4303 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4304 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4305 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004307
4308 /* TC cache setup ??? */
4309 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4310 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4311 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4312
4313 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4314 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4315 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4316 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4317 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4318
4319 WREG32(TC_CFG_L1_VOLATILE, 0);
4320 WREG32(TC_CFG_L2_VOLATILE, 0);
4321
4322 if (rdev->family == CHIP_KAVERI) {
4323 u32 tmp = RREG32(CHUB_CONTROL);
4324 tmp &= ~BYPASS_VM;
4325 WREG32(CHUB_CONTROL, tmp);
4326 }
4327
4328 /* XXX SH_MEM regs */
4329 /* where to put LDS, scratch, GPUVM in FSA64 space */
4330 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004331 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004332 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004333 WREG32(SH_MEM_CONFIG, 0);
4334 WREG32(SH_MEM_APE1_BASE, 1);
4335 WREG32(SH_MEM_APE1_LIMIT, 0);
4336 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004337 /* SDMA GFX */
4338 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4339 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4340 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4341 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4342 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004343 }
Alex Deucherb556b122013-01-29 10:44:22 -05004344 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucher1c491652013-04-09 12:45:26 -04004345
4346 cik_pcie_gart_tlb_flush(rdev);
4347 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4348 (unsigned)(rdev->mc.gtt_size >> 20),
4349 (unsigned long long)rdev->gart.table_addr);
4350 rdev->gart.ready = true;
4351 return 0;
4352}
4353
4354/**
4355 * cik_pcie_gart_disable - gart disable
4356 *
4357 * @rdev: radeon_device pointer
4358 *
4359 * This disables all VM page table (CIK).
4360 */
4361static void cik_pcie_gart_disable(struct radeon_device *rdev)
4362{
4363 /* Disable all tables */
4364 WREG32(VM_CONTEXT0_CNTL, 0);
4365 WREG32(VM_CONTEXT1_CNTL, 0);
4366 /* Setup TLB control */
4367 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4368 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4369 /* Setup L2 cache */
4370 WREG32(VM_L2_CNTL,
4371 ENABLE_L2_FRAGMENT_PROCESSING |
4372 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4373 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4374 EFFECTIVE_L2_QUEUE_SIZE(7) |
4375 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4376 WREG32(VM_L2_CNTL2, 0);
4377 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4378 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4379 radeon_gart_table_vram_unpin(rdev);
4380}
4381
4382/**
4383 * cik_pcie_gart_fini - vm fini callback
4384 *
4385 * @rdev: radeon_device pointer
4386 *
4387 * Tears down the driver GART/VM setup (CIK).
4388 */
4389static void cik_pcie_gart_fini(struct radeon_device *rdev)
4390{
4391 cik_pcie_gart_disable(rdev);
4392 radeon_gart_table_vram_free(rdev);
4393 radeon_gart_fini(rdev);
4394}
4395
4396/* vm parser */
4397/**
4398 * cik_ib_parse - vm ib_parse callback
4399 *
4400 * @rdev: radeon_device pointer
4401 * @ib: indirect buffer pointer
4402 *
4403 * CIK uses hw IB checking so this is a nop (CIK).
4404 */
4405int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4406{
4407 return 0;
4408}
4409
4410/*
4411 * vm
4412 * VMID 0 is the physical GPU addresses as used by the kernel.
4413 * VMIDs 1-15 are used for userspace clients and are handled
4414 * by the radeon vm/hsa code.
4415 */
4416/**
4417 * cik_vm_init - cik vm init callback
4418 *
4419 * @rdev: radeon_device pointer
4420 *
4421 * Inits cik specific vm parameters (number of VMs, base of vram for
4422 * VMIDs 1-15) (CIK).
4423 * Returns 0 for success.
4424 */
4425int cik_vm_init(struct radeon_device *rdev)
4426{
4427 /* number of VMs */
4428 rdev->vm_manager.nvm = 16;
4429 /* base offset of vram pages */
4430 if (rdev->flags & RADEON_IS_IGP) {
4431 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4432 tmp <<= 22;
4433 rdev->vm_manager.vram_base_offset = tmp;
4434 } else
4435 rdev->vm_manager.vram_base_offset = 0;
4436
4437 return 0;
4438}
4439
4440/**
4441 * cik_vm_fini - cik vm fini callback
4442 *
4443 * @rdev: radeon_device pointer
4444 *
4445 * Tear down any asic specific VM setup (CIK).
4446 */
4447void cik_vm_fini(struct radeon_device *rdev)
4448{
4449}
4450
Alex Deucherf96ab482012-08-31 10:37:47 -04004451/**
4452 * cik_vm_flush - cik vm flush using the CP
4453 *
4454 * @rdev: radeon_device pointer
4455 *
4456 * Update the page table base and flush the VM TLB
4457 * using the CP (CIK).
4458 */
4459void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4460{
4461 struct radeon_ring *ring = &rdev->ring[ridx];
4462
4463 if (vm == NULL)
4464 return;
4465
4466 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4467 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4468 WRITE_DATA_DST_SEL(0)));
4469 if (vm->id < 8) {
4470 radeon_ring_write(ring,
4471 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4472 } else {
4473 radeon_ring_write(ring,
4474 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4475 }
4476 radeon_ring_write(ring, 0);
4477 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4478
4479 /* update SH_MEM_* regs */
4480 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4481 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4482 WRITE_DATA_DST_SEL(0)));
4483 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4484 radeon_ring_write(ring, 0);
4485 radeon_ring_write(ring, VMID(vm->id));
4486
4487 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4488 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4489 WRITE_DATA_DST_SEL(0)));
4490 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4491 radeon_ring_write(ring, 0);
4492
4493 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4494 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4495 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4496 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4497
4498 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4499 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4500 WRITE_DATA_DST_SEL(0)));
4501 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4502 radeon_ring_write(ring, 0);
4503 radeon_ring_write(ring, VMID(0));
4504
4505 /* HDP flush */
4506 /* We should be using the WAIT_REG_MEM packet here like in
4507 * cik_fence_ring_emit(), but it causes the CP to hang in this
4508 * context...
4509 */
4510 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4511 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4512 WRITE_DATA_DST_SEL(0)));
4513 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4514 radeon_ring_write(ring, 0);
4515 radeon_ring_write(ring, 0);
4516
4517 /* bits 0-15 are the VM contexts0-15 */
4518 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4519 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4520 WRITE_DATA_DST_SEL(0)));
4521 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4522 radeon_ring_write(ring, 0);
4523 radeon_ring_write(ring, 1 << vm->id);
4524
Alex Deucherb07fdd32013-04-11 09:36:17 -04004525 /* compute doesn't have PFP */
4526 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4527 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4528 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4529 radeon_ring_write(ring, 0x0);
4530 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004531}
4532
Alex Deucher605de6b2012-10-22 13:04:03 -04004533/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004534 * cik_vm_set_page - update the page tables using sDMA
4535 *
4536 * @rdev: radeon_device pointer
4537 * @ib: indirect buffer to fill with commands
4538 * @pe: addr of the page entry
4539 * @addr: dst addr to write into pe
4540 * @count: number of page entries to update
4541 * @incr: increase next addr by incr bytes
4542 * @flags: access flags
4543 *
4544 * Update the page tables using CP or sDMA (CIK).
4545 */
4546void cik_vm_set_page(struct radeon_device *rdev,
4547 struct radeon_ib *ib,
4548 uint64_t pe,
4549 uint64_t addr, unsigned count,
4550 uint32_t incr, uint32_t flags)
4551{
4552 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4553 uint64_t value;
4554 unsigned ndw;
4555
4556 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4557 /* CP */
4558 while (count) {
4559 ndw = 2 + count * 2;
4560 if (ndw > 0x3FFE)
4561 ndw = 0x3FFE;
4562
4563 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4564 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4565 WRITE_DATA_DST_SEL(1));
4566 ib->ptr[ib->length_dw++] = pe;
4567 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4568 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4569 if (flags & RADEON_VM_PAGE_SYSTEM) {
4570 value = radeon_vm_map_gart(rdev, addr);
4571 value &= 0xFFFFFFFFFFFFF000ULL;
4572 } else if (flags & RADEON_VM_PAGE_VALID) {
4573 value = addr;
4574 } else {
4575 value = 0;
4576 }
4577 addr += incr;
4578 value |= r600_flags;
4579 ib->ptr[ib->length_dw++] = value;
4580 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4581 }
4582 }
4583 } else {
4584 /* DMA */
4585 if (flags & RADEON_VM_PAGE_SYSTEM) {
4586 while (count) {
4587 ndw = count * 2;
4588 if (ndw > 0xFFFFE)
4589 ndw = 0xFFFFE;
4590
4591 /* for non-physically contiguous pages (system) */
4592 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4593 ib->ptr[ib->length_dw++] = pe;
4594 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4595 ib->ptr[ib->length_dw++] = ndw;
4596 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4597 if (flags & RADEON_VM_PAGE_SYSTEM) {
4598 value = radeon_vm_map_gart(rdev, addr);
4599 value &= 0xFFFFFFFFFFFFF000ULL;
4600 } else if (flags & RADEON_VM_PAGE_VALID) {
4601 value = addr;
4602 } else {
4603 value = 0;
4604 }
4605 addr += incr;
4606 value |= r600_flags;
4607 ib->ptr[ib->length_dw++] = value;
4608 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4609 }
4610 }
4611 } else {
4612 while (count) {
4613 ndw = count;
4614 if (ndw > 0x7FFFF)
4615 ndw = 0x7FFFF;
4616
4617 if (flags & RADEON_VM_PAGE_VALID)
4618 value = addr;
4619 else
4620 value = 0;
4621 /* for physically contiguous pages (vram) */
4622 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4623 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4624 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4625 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4626 ib->ptr[ib->length_dw++] = 0;
4627 ib->ptr[ib->length_dw++] = value; /* value */
4628 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4629 ib->ptr[ib->length_dw++] = incr; /* increment size */
4630 ib->ptr[ib->length_dw++] = 0;
4631 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4632 pe += ndw * 8;
4633 addr += ndw * incr;
4634 count -= ndw;
4635 }
4636 }
4637 while (ib->length_dw & 0x7)
4638 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4639 }
4640}
4641
4642/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004643 * cik_dma_vm_flush - cik vm flush using sDMA
4644 *
4645 * @rdev: radeon_device pointer
4646 *
4647 * Update the page table base and flush the VM TLB
4648 * using sDMA (CIK).
4649 */
4650void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4651{
4652 struct radeon_ring *ring = &rdev->ring[ridx];
4653 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4654 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4655 u32 ref_and_mask;
4656
4657 if (vm == NULL)
4658 return;
4659
4660 if (ridx == R600_RING_TYPE_DMA_INDEX)
4661 ref_and_mask = SDMA0;
4662 else
4663 ref_and_mask = SDMA1;
4664
4665 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4666 if (vm->id < 8) {
4667 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4668 } else {
4669 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4670 }
4671 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4672
4673 /* update SH_MEM_* regs */
4674 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4675 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4676 radeon_ring_write(ring, VMID(vm->id));
4677
4678 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4680 radeon_ring_write(ring, 0);
4681
4682 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4683 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4684 radeon_ring_write(ring, 0);
4685
4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4688 radeon_ring_write(ring, 1);
4689
4690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4692 radeon_ring_write(ring, 0);
4693
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4696 radeon_ring_write(ring, VMID(0));
4697
4698 /* flush HDP */
4699 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4700 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4701 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4702 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4703 radeon_ring_write(ring, ref_and_mask); /* MASK */
4704 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4705
4706 /* flush TLB */
4707 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4708 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4709 radeon_ring_write(ring, 1 << vm->id);
4710}
4711
Alex Deucherf6796ca2012-11-09 10:44:08 -05004712/*
4713 * RLC
4714 * The RLC is a multi-purpose microengine that handles a
4715 * variety of functions, the most important of which is
4716 * the interrupt controller.
4717 */
4718/**
4719 * cik_rlc_stop - stop the RLC ME
4720 *
4721 * @rdev: radeon_device pointer
4722 *
4723 * Halt the RLC ME (MicroEngine) (CIK).
4724 */
4725static void cik_rlc_stop(struct radeon_device *rdev)
4726{
4727 int i, j, k;
4728 u32 mask, tmp;
4729
4730 tmp = RREG32(CP_INT_CNTL_RING0);
4731 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4732 WREG32(CP_INT_CNTL_RING0, tmp);
4733
4734 RREG32(CB_CGTT_SCLK_CTRL);
4735 RREG32(CB_CGTT_SCLK_CTRL);
4736 RREG32(CB_CGTT_SCLK_CTRL);
4737 RREG32(CB_CGTT_SCLK_CTRL);
4738
4739 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4740 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4741
4742 WREG32(RLC_CNTL, 0);
4743
4744 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4745 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4746 cik_select_se_sh(rdev, i, j);
4747 for (k = 0; k < rdev->usec_timeout; k++) {
4748 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4749 break;
4750 udelay(1);
4751 }
4752 }
4753 }
4754 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4755
4756 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4757 for (k = 0; k < rdev->usec_timeout; k++) {
4758 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4759 break;
4760 udelay(1);
4761 }
4762}
4763
4764/**
4765 * cik_rlc_start - start the RLC ME
4766 *
4767 * @rdev: radeon_device pointer
4768 *
4769 * Unhalt the RLC ME (MicroEngine) (CIK).
4770 */
4771static void cik_rlc_start(struct radeon_device *rdev)
4772{
4773 u32 tmp;
4774
4775 WREG32(RLC_CNTL, RLC_ENABLE);
4776
4777 tmp = RREG32(CP_INT_CNTL_RING0);
4778 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4779 WREG32(CP_INT_CNTL_RING0, tmp);
4780
4781 udelay(50);
4782}
4783
4784/**
4785 * cik_rlc_resume - setup the RLC hw
4786 *
4787 * @rdev: radeon_device pointer
4788 *
4789 * Initialize the RLC registers, load the ucode,
4790 * and start the RLC (CIK).
4791 * Returns 0 for success, -EINVAL if the ucode is not available.
4792 */
4793static int cik_rlc_resume(struct radeon_device *rdev)
4794{
4795 u32 i, size;
4796 u32 clear_state_info[3];
4797 const __be32 *fw_data;
4798
4799 if (!rdev->rlc_fw)
4800 return -EINVAL;
4801
4802 switch (rdev->family) {
4803 case CHIP_BONAIRE:
4804 default:
4805 size = BONAIRE_RLC_UCODE_SIZE;
4806 break;
4807 case CHIP_KAVERI:
4808 size = KV_RLC_UCODE_SIZE;
4809 break;
4810 case CHIP_KABINI:
4811 size = KB_RLC_UCODE_SIZE;
4812 break;
4813 }
4814
4815 cik_rlc_stop(rdev);
4816
4817 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4818 RREG32(GRBM_SOFT_RESET);
4819 udelay(50);
4820 WREG32(GRBM_SOFT_RESET, 0);
4821 RREG32(GRBM_SOFT_RESET);
4822 udelay(50);
4823
4824 WREG32(RLC_LB_CNTR_INIT, 0);
4825 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4826
4827 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4828 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4829 WREG32(RLC_LB_PARAMS, 0x00600408);
4830 WREG32(RLC_LB_CNTL, 0x80000004);
4831
4832 WREG32(RLC_MC_CNTL, 0);
4833 WREG32(RLC_UCODE_CNTL, 0);
4834
4835 fw_data = (const __be32 *)rdev->rlc_fw->data;
4836 WREG32(RLC_GPM_UCODE_ADDR, 0);
4837 for (i = 0; i < size; i++)
4838 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4839 WREG32(RLC_GPM_UCODE_ADDR, 0);
4840
4841 /* XXX */
4842 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4843 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4844 clear_state_info[2] = 0;//cik_default_size;
4845 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4846 for (i = 0; i < 3; i++)
4847 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4848 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4849
4850 cik_rlc_start(rdev);
4851
4852 return 0;
4853}
Alex Deuchera59781b2012-11-09 10:45:57 -05004854
4855/*
4856 * Interrupts
4857 * Starting with r6xx, interrupts are handled via a ring buffer.
4858 * Ring buffers are areas of GPU accessible memory that the GPU
4859 * writes interrupt vectors into and the host reads vectors out of.
4860 * There is a rptr (read pointer) that determines where the
4861 * host is currently reading, and a wptr (write pointer)
4862 * which determines where the GPU has written. When the
4863 * pointers are equal, the ring is idle. When the GPU
4864 * writes vectors to the ring buffer, it increments the
4865 * wptr. When there is an interrupt, the host then starts
4866 * fetching commands and processing them until the pointers are
4867 * equal again at which point it updates the rptr.
4868 */
4869
4870/**
4871 * cik_enable_interrupts - Enable the interrupt ring buffer
4872 *
4873 * @rdev: radeon_device pointer
4874 *
4875 * Enable the interrupt ring buffer (CIK).
4876 */
4877static void cik_enable_interrupts(struct radeon_device *rdev)
4878{
4879 u32 ih_cntl = RREG32(IH_CNTL);
4880 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4881
4882 ih_cntl |= ENABLE_INTR;
4883 ih_rb_cntl |= IH_RB_ENABLE;
4884 WREG32(IH_CNTL, ih_cntl);
4885 WREG32(IH_RB_CNTL, ih_rb_cntl);
4886 rdev->ih.enabled = true;
4887}
4888
4889/**
4890 * cik_disable_interrupts - Disable the interrupt ring buffer
4891 *
4892 * @rdev: radeon_device pointer
4893 *
4894 * Disable the interrupt ring buffer (CIK).
4895 */
4896static void cik_disable_interrupts(struct radeon_device *rdev)
4897{
4898 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4899 u32 ih_cntl = RREG32(IH_CNTL);
4900
4901 ih_rb_cntl &= ~IH_RB_ENABLE;
4902 ih_cntl &= ~ENABLE_INTR;
4903 WREG32(IH_RB_CNTL, ih_rb_cntl);
4904 WREG32(IH_CNTL, ih_cntl);
4905 /* set rptr, wptr to 0 */
4906 WREG32(IH_RB_RPTR, 0);
4907 WREG32(IH_RB_WPTR, 0);
4908 rdev->ih.enabled = false;
4909 rdev->ih.rptr = 0;
4910}
4911
4912/**
4913 * cik_disable_interrupt_state - Disable all interrupt sources
4914 *
4915 * @rdev: radeon_device pointer
4916 *
4917 * Clear all interrupt enable bits used by the driver (CIK).
4918 */
4919static void cik_disable_interrupt_state(struct radeon_device *rdev)
4920{
4921 u32 tmp;
4922
4923 /* gfx ring */
4924 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004925 /* sdma */
4926 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4927 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4928 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4929 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004930 /* compute queues */
4931 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4932 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4933 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4934 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4935 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4936 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4937 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4938 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4939 /* grbm */
4940 WREG32(GRBM_INT_CNTL, 0);
4941 /* vline/vblank, etc. */
4942 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4943 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4944 if (rdev->num_crtc >= 4) {
4945 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4946 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4947 }
4948 if (rdev->num_crtc >= 6) {
4949 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4950 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4951 }
4952
4953 /* dac hotplug */
4954 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4955
4956 /* digital hotplug */
4957 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4958 WREG32(DC_HPD1_INT_CONTROL, tmp);
4959 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4960 WREG32(DC_HPD2_INT_CONTROL, tmp);
4961 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4962 WREG32(DC_HPD3_INT_CONTROL, tmp);
4963 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4964 WREG32(DC_HPD4_INT_CONTROL, tmp);
4965 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4966 WREG32(DC_HPD5_INT_CONTROL, tmp);
4967 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4968 WREG32(DC_HPD6_INT_CONTROL, tmp);
4969
4970}
4971
4972/**
4973 * cik_irq_init - init and enable the interrupt ring
4974 *
4975 * @rdev: radeon_device pointer
4976 *
4977 * Allocate a ring buffer for the interrupt controller,
4978 * enable the RLC, disable interrupts, enable the IH
4979 * ring buffer and enable it (CIK).
4980 * Called at device load and reume.
4981 * Returns 0 for success, errors for failure.
4982 */
4983static int cik_irq_init(struct radeon_device *rdev)
4984{
4985 int ret = 0;
4986 int rb_bufsz;
4987 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4988
4989 /* allocate ring */
4990 ret = r600_ih_ring_alloc(rdev);
4991 if (ret)
4992 return ret;
4993
4994 /* disable irqs */
4995 cik_disable_interrupts(rdev);
4996
4997 /* init rlc */
4998 ret = cik_rlc_resume(rdev);
4999 if (ret) {
5000 r600_ih_ring_fini(rdev);
5001 return ret;
5002 }
5003
5004 /* setup interrupt control */
5005 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5006 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5007 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5008 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5009 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5010 */
5011 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5012 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5013 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5014 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5015
5016 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5017 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5018
5019 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5020 IH_WPTR_OVERFLOW_CLEAR |
5021 (rb_bufsz << 1));
5022
5023 if (rdev->wb.enabled)
5024 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5025
5026 /* set the writeback address whether it's enabled or not */
5027 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5028 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5029
5030 WREG32(IH_RB_CNTL, ih_rb_cntl);
5031
5032 /* set rptr, wptr to 0 */
5033 WREG32(IH_RB_RPTR, 0);
5034 WREG32(IH_RB_WPTR, 0);
5035
5036 /* Default settings for IH_CNTL (disabled at first) */
5037 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5038 /* RPTR_REARM only works if msi's are enabled */
5039 if (rdev->msi_enabled)
5040 ih_cntl |= RPTR_REARM;
5041 WREG32(IH_CNTL, ih_cntl);
5042
5043 /* force the active interrupt state to all disabled */
5044 cik_disable_interrupt_state(rdev);
5045
5046 pci_set_master(rdev->pdev);
5047
5048 /* enable irqs */
5049 cik_enable_interrupts(rdev);
5050
5051 return ret;
5052}
5053
5054/**
5055 * cik_irq_set - enable/disable interrupt sources
5056 *
5057 * @rdev: radeon_device pointer
5058 *
5059 * Enable interrupt sources on the GPU (vblanks, hpd,
5060 * etc.) (CIK).
5061 * Returns 0 for success, errors for failure.
5062 */
5063int cik_irq_set(struct radeon_device *rdev)
5064{
5065 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5066 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005067 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5068 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005069 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5070 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5071 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005072 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005073
5074 if (!rdev->irq.installed) {
5075 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5076 return -EINVAL;
5077 }
5078 /* don't enable anything if the ih is disabled */
5079 if (!rdev->ih.enabled) {
5080 cik_disable_interrupts(rdev);
5081 /* force the active interrupt state to all disabled */
5082 cik_disable_interrupt_state(rdev);
5083 return 0;
5084 }
5085
5086 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5087 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5088 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5089 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5090 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5091 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5092
Alex Deucher21a93e12013-04-09 12:47:11 -04005093 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5094 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5095
Alex Deucher2b0781a2013-04-09 14:26:16 -04005096 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5097 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5098 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5099 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5100 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5101 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5102 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5103 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5104
Alex Deuchera59781b2012-11-09 10:45:57 -05005105 /* enable CP interrupts on all rings */
5106 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5107 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5108 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5109 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005110 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5111 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5112 DRM_DEBUG("si_irq_set: sw int cp1\n");
5113 if (ring->me == 1) {
5114 switch (ring->pipe) {
5115 case 0:
5116 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5117 break;
5118 case 1:
5119 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5120 break;
5121 case 2:
5122 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5123 break;
5124 case 3:
5125 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5126 break;
5127 default:
5128 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5129 break;
5130 }
5131 } else if (ring->me == 2) {
5132 switch (ring->pipe) {
5133 case 0:
5134 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5135 break;
5136 case 1:
5137 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5138 break;
5139 case 2:
5140 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5141 break;
5142 case 3:
5143 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5144 break;
5145 default:
5146 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5147 break;
5148 }
5149 } else {
5150 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5151 }
5152 }
5153 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5154 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5155 DRM_DEBUG("si_irq_set: sw int cp2\n");
5156 if (ring->me == 1) {
5157 switch (ring->pipe) {
5158 case 0:
5159 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5160 break;
5161 case 1:
5162 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5163 break;
5164 case 2:
5165 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5166 break;
5167 case 3:
5168 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5169 break;
5170 default:
5171 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5172 break;
5173 }
5174 } else if (ring->me == 2) {
5175 switch (ring->pipe) {
5176 case 0:
5177 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5178 break;
5179 case 1:
5180 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5181 break;
5182 case 2:
5183 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5184 break;
5185 case 3:
5186 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5187 break;
5188 default:
5189 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5190 break;
5191 }
5192 } else {
5193 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5194 }
5195 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005196
Alex Deucher21a93e12013-04-09 12:47:11 -04005197 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5198 DRM_DEBUG("cik_irq_set: sw int dma\n");
5199 dma_cntl |= TRAP_ENABLE;
5200 }
5201
5202 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5203 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5204 dma_cntl1 |= TRAP_ENABLE;
5205 }
5206
Alex Deuchera59781b2012-11-09 10:45:57 -05005207 if (rdev->irq.crtc_vblank_int[0] ||
5208 atomic_read(&rdev->irq.pflip[0])) {
5209 DRM_DEBUG("cik_irq_set: vblank 0\n");
5210 crtc1 |= VBLANK_INTERRUPT_MASK;
5211 }
5212 if (rdev->irq.crtc_vblank_int[1] ||
5213 atomic_read(&rdev->irq.pflip[1])) {
5214 DRM_DEBUG("cik_irq_set: vblank 1\n");
5215 crtc2 |= VBLANK_INTERRUPT_MASK;
5216 }
5217 if (rdev->irq.crtc_vblank_int[2] ||
5218 atomic_read(&rdev->irq.pflip[2])) {
5219 DRM_DEBUG("cik_irq_set: vblank 2\n");
5220 crtc3 |= VBLANK_INTERRUPT_MASK;
5221 }
5222 if (rdev->irq.crtc_vblank_int[3] ||
5223 atomic_read(&rdev->irq.pflip[3])) {
5224 DRM_DEBUG("cik_irq_set: vblank 3\n");
5225 crtc4 |= VBLANK_INTERRUPT_MASK;
5226 }
5227 if (rdev->irq.crtc_vblank_int[4] ||
5228 atomic_read(&rdev->irq.pflip[4])) {
5229 DRM_DEBUG("cik_irq_set: vblank 4\n");
5230 crtc5 |= VBLANK_INTERRUPT_MASK;
5231 }
5232 if (rdev->irq.crtc_vblank_int[5] ||
5233 atomic_read(&rdev->irq.pflip[5])) {
5234 DRM_DEBUG("cik_irq_set: vblank 5\n");
5235 crtc6 |= VBLANK_INTERRUPT_MASK;
5236 }
5237 if (rdev->irq.hpd[0]) {
5238 DRM_DEBUG("cik_irq_set: hpd 1\n");
5239 hpd1 |= DC_HPDx_INT_EN;
5240 }
5241 if (rdev->irq.hpd[1]) {
5242 DRM_DEBUG("cik_irq_set: hpd 2\n");
5243 hpd2 |= DC_HPDx_INT_EN;
5244 }
5245 if (rdev->irq.hpd[2]) {
5246 DRM_DEBUG("cik_irq_set: hpd 3\n");
5247 hpd3 |= DC_HPDx_INT_EN;
5248 }
5249 if (rdev->irq.hpd[3]) {
5250 DRM_DEBUG("cik_irq_set: hpd 4\n");
5251 hpd4 |= DC_HPDx_INT_EN;
5252 }
5253 if (rdev->irq.hpd[4]) {
5254 DRM_DEBUG("cik_irq_set: hpd 5\n");
5255 hpd5 |= DC_HPDx_INT_EN;
5256 }
5257 if (rdev->irq.hpd[5]) {
5258 DRM_DEBUG("cik_irq_set: hpd 6\n");
5259 hpd6 |= DC_HPDx_INT_EN;
5260 }
5261
5262 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5263
Alex Deucher21a93e12013-04-09 12:47:11 -04005264 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5265 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5266
Alex Deucher2b0781a2013-04-09 14:26:16 -04005267 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5268 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5269 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5270 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5271 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5272 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5273 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5274 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5275
Alex Deuchera59781b2012-11-09 10:45:57 -05005276 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5277
5278 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5279 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5280 if (rdev->num_crtc >= 4) {
5281 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5282 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5283 }
5284 if (rdev->num_crtc >= 6) {
5285 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5286 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5287 }
5288
5289 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5290 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5291 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5292 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5293 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5294 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5295
5296 return 0;
5297}
5298
5299/**
5300 * cik_irq_ack - ack interrupt sources
5301 *
5302 * @rdev: radeon_device pointer
5303 *
5304 * Ack interrupt sources on the GPU (vblanks, hpd,
5305 * etc.) (CIK). Certain interrupts sources are sw
5306 * generated and do not require an explicit ack.
5307 */
5308static inline void cik_irq_ack(struct radeon_device *rdev)
5309{
5310 u32 tmp;
5311
5312 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5313 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5314 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5315 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5316 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5317 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5318 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5319
5320 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5321 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5322 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5323 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5324 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5325 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5326 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5327 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5328
5329 if (rdev->num_crtc >= 4) {
5330 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5331 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5332 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5333 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5334 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5335 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5336 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5337 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5338 }
5339
5340 if (rdev->num_crtc >= 6) {
5341 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5343 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5345 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5349 }
5350
5351 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5352 tmp = RREG32(DC_HPD1_INT_CONTROL);
5353 tmp |= DC_HPDx_INT_ACK;
5354 WREG32(DC_HPD1_INT_CONTROL, tmp);
5355 }
5356 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5357 tmp = RREG32(DC_HPD2_INT_CONTROL);
5358 tmp |= DC_HPDx_INT_ACK;
5359 WREG32(DC_HPD2_INT_CONTROL, tmp);
5360 }
5361 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5362 tmp = RREG32(DC_HPD3_INT_CONTROL);
5363 tmp |= DC_HPDx_INT_ACK;
5364 WREG32(DC_HPD3_INT_CONTROL, tmp);
5365 }
5366 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5367 tmp = RREG32(DC_HPD4_INT_CONTROL);
5368 tmp |= DC_HPDx_INT_ACK;
5369 WREG32(DC_HPD4_INT_CONTROL, tmp);
5370 }
5371 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5372 tmp = RREG32(DC_HPD5_INT_CONTROL);
5373 tmp |= DC_HPDx_INT_ACK;
5374 WREG32(DC_HPD5_INT_CONTROL, tmp);
5375 }
5376 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5377 tmp = RREG32(DC_HPD5_INT_CONTROL);
5378 tmp |= DC_HPDx_INT_ACK;
5379 WREG32(DC_HPD6_INT_CONTROL, tmp);
5380 }
5381}
5382
5383/**
5384 * cik_irq_disable - disable interrupts
5385 *
5386 * @rdev: radeon_device pointer
5387 *
5388 * Disable interrupts on the hw (CIK).
5389 */
5390static void cik_irq_disable(struct radeon_device *rdev)
5391{
5392 cik_disable_interrupts(rdev);
5393 /* Wait and acknowledge irq */
5394 mdelay(1);
5395 cik_irq_ack(rdev);
5396 cik_disable_interrupt_state(rdev);
5397}
5398
5399/**
5400 * cik_irq_disable - disable interrupts for suspend
5401 *
5402 * @rdev: radeon_device pointer
5403 *
5404 * Disable interrupts and stop the RLC (CIK).
5405 * Used for suspend.
5406 */
5407static void cik_irq_suspend(struct radeon_device *rdev)
5408{
5409 cik_irq_disable(rdev);
5410 cik_rlc_stop(rdev);
5411}
5412
5413/**
5414 * cik_irq_fini - tear down interrupt support
5415 *
5416 * @rdev: radeon_device pointer
5417 *
5418 * Disable interrupts on the hw and free the IH ring
5419 * buffer (CIK).
5420 * Used for driver unload.
5421 */
5422static void cik_irq_fini(struct radeon_device *rdev)
5423{
5424 cik_irq_suspend(rdev);
5425 r600_ih_ring_fini(rdev);
5426}
5427
5428/**
5429 * cik_get_ih_wptr - get the IH ring buffer wptr
5430 *
5431 * @rdev: radeon_device pointer
5432 *
5433 * Get the IH ring buffer wptr from either the register
5434 * or the writeback memory buffer (CIK). Also check for
5435 * ring buffer overflow and deal with it.
5436 * Used by cik_irq_process().
5437 * Returns the value of the wptr.
5438 */
5439static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5440{
5441 u32 wptr, tmp;
5442
5443 if (rdev->wb.enabled)
5444 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5445 else
5446 wptr = RREG32(IH_RB_WPTR);
5447
5448 if (wptr & RB_OVERFLOW) {
5449 /* When a ring buffer overflow happen start parsing interrupt
5450 * from the last not overwritten vector (wptr + 16). Hopefully
5451 * this should allow us to catchup.
5452 */
5453 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5454 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5455 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5456 tmp = RREG32(IH_RB_CNTL);
5457 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5458 WREG32(IH_RB_CNTL, tmp);
5459 }
5460 return (wptr & rdev->ih.ptr_mask);
5461}
5462
5463/* CIK IV Ring
5464 * Each IV ring entry is 128 bits:
5465 * [7:0] - interrupt source id
5466 * [31:8] - reserved
5467 * [59:32] - interrupt source data
5468 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005469 * [71:64] - RINGID
5470 * CP:
5471 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005472 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5473 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5474 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5475 * PIPE_ID - ME0 0=3D
5476 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005477 * SDMA:
5478 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5479 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5480 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005481 * [79:72] - VMID
5482 * [95:80] - PASID
5483 * [127:96] - reserved
5484 */
5485/**
5486 * cik_irq_process - interrupt handler
5487 *
5488 * @rdev: radeon_device pointer
5489 *
5490 * Interrupt hander (CIK). Walk the IH ring,
5491 * ack interrupts and schedule work to handle
5492 * interrupt events.
5493 * Returns irq process return code.
5494 */
5495int cik_irq_process(struct radeon_device *rdev)
5496{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005497 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5498 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005499 u32 wptr;
5500 u32 rptr;
5501 u32 src_id, src_data, ring_id;
5502 u8 me_id, pipe_id, queue_id;
5503 u32 ring_index;
5504 bool queue_hotplug = false;
5505 bool queue_reset = false;
5506
5507 if (!rdev->ih.enabled || rdev->shutdown)
5508 return IRQ_NONE;
5509
5510 wptr = cik_get_ih_wptr(rdev);
5511
5512restart_ih:
5513 /* is somebody else already processing irqs? */
5514 if (atomic_xchg(&rdev->ih.lock, 1))
5515 return IRQ_NONE;
5516
5517 rptr = rdev->ih.rptr;
5518 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5519
5520 /* Order reading of wptr vs. reading of IH ring data */
5521 rmb();
5522
5523 /* display interrupts */
5524 cik_irq_ack(rdev);
5525
5526 while (rptr != wptr) {
5527 /* wptr/rptr are in bytes! */
5528 ring_index = rptr / 4;
5529 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5530 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5531 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005532
5533 switch (src_id) {
5534 case 1: /* D1 vblank/vline */
5535 switch (src_data) {
5536 case 0: /* D1 vblank */
5537 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5538 if (rdev->irq.crtc_vblank_int[0]) {
5539 drm_handle_vblank(rdev->ddev, 0);
5540 rdev->pm.vblank_sync = true;
5541 wake_up(&rdev->irq.vblank_queue);
5542 }
5543 if (atomic_read(&rdev->irq.pflip[0]))
5544 radeon_crtc_handle_flip(rdev, 0);
5545 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5546 DRM_DEBUG("IH: D1 vblank\n");
5547 }
5548 break;
5549 case 1: /* D1 vline */
5550 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5551 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5552 DRM_DEBUG("IH: D1 vline\n");
5553 }
5554 break;
5555 default:
5556 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5557 break;
5558 }
5559 break;
5560 case 2: /* D2 vblank/vline */
5561 switch (src_data) {
5562 case 0: /* D2 vblank */
5563 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5564 if (rdev->irq.crtc_vblank_int[1]) {
5565 drm_handle_vblank(rdev->ddev, 1);
5566 rdev->pm.vblank_sync = true;
5567 wake_up(&rdev->irq.vblank_queue);
5568 }
5569 if (atomic_read(&rdev->irq.pflip[1]))
5570 radeon_crtc_handle_flip(rdev, 1);
5571 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5572 DRM_DEBUG("IH: D2 vblank\n");
5573 }
5574 break;
5575 case 1: /* D2 vline */
5576 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5577 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5578 DRM_DEBUG("IH: D2 vline\n");
5579 }
5580 break;
5581 default:
5582 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5583 break;
5584 }
5585 break;
5586 case 3: /* D3 vblank/vline */
5587 switch (src_data) {
5588 case 0: /* D3 vblank */
5589 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5590 if (rdev->irq.crtc_vblank_int[2]) {
5591 drm_handle_vblank(rdev->ddev, 2);
5592 rdev->pm.vblank_sync = true;
5593 wake_up(&rdev->irq.vblank_queue);
5594 }
5595 if (atomic_read(&rdev->irq.pflip[2]))
5596 radeon_crtc_handle_flip(rdev, 2);
5597 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5598 DRM_DEBUG("IH: D3 vblank\n");
5599 }
5600 break;
5601 case 1: /* D3 vline */
5602 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5603 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5604 DRM_DEBUG("IH: D3 vline\n");
5605 }
5606 break;
5607 default:
5608 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5609 break;
5610 }
5611 break;
5612 case 4: /* D4 vblank/vline */
5613 switch (src_data) {
5614 case 0: /* D4 vblank */
5615 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5616 if (rdev->irq.crtc_vblank_int[3]) {
5617 drm_handle_vblank(rdev->ddev, 3);
5618 rdev->pm.vblank_sync = true;
5619 wake_up(&rdev->irq.vblank_queue);
5620 }
5621 if (atomic_read(&rdev->irq.pflip[3]))
5622 radeon_crtc_handle_flip(rdev, 3);
5623 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5624 DRM_DEBUG("IH: D4 vblank\n");
5625 }
5626 break;
5627 case 1: /* D4 vline */
5628 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5629 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5630 DRM_DEBUG("IH: D4 vline\n");
5631 }
5632 break;
5633 default:
5634 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5635 break;
5636 }
5637 break;
5638 case 5: /* D5 vblank/vline */
5639 switch (src_data) {
5640 case 0: /* D5 vblank */
5641 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5642 if (rdev->irq.crtc_vblank_int[4]) {
5643 drm_handle_vblank(rdev->ddev, 4);
5644 rdev->pm.vblank_sync = true;
5645 wake_up(&rdev->irq.vblank_queue);
5646 }
5647 if (atomic_read(&rdev->irq.pflip[4]))
5648 radeon_crtc_handle_flip(rdev, 4);
5649 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5650 DRM_DEBUG("IH: D5 vblank\n");
5651 }
5652 break;
5653 case 1: /* D5 vline */
5654 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5655 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5656 DRM_DEBUG("IH: D5 vline\n");
5657 }
5658 break;
5659 default:
5660 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5661 break;
5662 }
5663 break;
5664 case 6: /* D6 vblank/vline */
5665 switch (src_data) {
5666 case 0: /* D6 vblank */
5667 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5668 if (rdev->irq.crtc_vblank_int[5]) {
5669 drm_handle_vblank(rdev->ddev, 5);
5670 rdev->pm.vblank_sync = true;
5671 wake_up(&rdev->irq.vblank_queue);
5672 }
5673 if (atomic_read(&rdev->irq.pflip[5]))
5674 radeon_crtc_handle_flip(rdev, 5);
5675 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5676 DRM_DEBUG("IH: D6 vblank\n");
5677 }
5678 break;
5679 case 1: /* D6 vline */
5680 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5681 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5682 DRM_DEBUG("IH: D6 vline\n");
5683 }
5684 break;
5685 default:
5686 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5687 break;
5688 }
5689 break;
5690 case 42: /* HPD hotplug */
5691 switch (src_data) {
5692 case 0:
5693 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5694 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5695 queue_hotplug = true;
5696 DRM_DEBUG("IH: HPD1\n");
5697 }
5698 break;
5699 case 1:
5700 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5701 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5702 queue_hotplug = true;
5703 DRM_DEBUG("IH: HPD2\n");
5704 }
5705 break;
5706 case 2:
5707 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5708 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5709 queue_hotplug = true;
5710 DRM_DEBUG("IH: HPD3\n");
5711 }
5712 break;
5713 case 3:
5714 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5715 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5716 queue_hotplug = true;
5717 DRM_DEBUG("IH: HPD4\n");
5718 }
5719 break;
5720 case 4:
5721 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5722 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5723 queue_hotplug = true;
5724 DRM_DEBUG("IH: HPD5\n");
5725 }
5726 break;
5727 case 5:
5728 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5729 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5730 queue_hotplug = true;
5731 DRM_DEBUG("IH: HPD6\n");
5732 }
5733 break;
5734 default:
5735 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5736 break;
5737 }
5738 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005739 case 146:
5740 case 147:
5741 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5742 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5743 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5744 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5745 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5746 /* reset addr and status */
5747 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5748 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005749 case 176: /* GFX RB CP_INT */
5750 case 177: /* GFX IB CP_INT */
5751 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5752 break;
5753 case 181: /* CP EOP event */
5754 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005755 /* XXX check the bitfield order! */
5756 me_id = (ring_id & 0x60) >> 5;
5757 pipe_id = (ring_id & 0x18) >> 3;
5758 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005759 switch (me_id) {
5760 case 0:
5761 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5762 break;
5763 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005764 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005765 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5766 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5767 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5768 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005769 break;
5770 }
5771 break;
5772 case 184: /* CP Privileged reg access */
5773 DRM_ERROR("Illegal register access in command stream\n");
5774 /* XXX check the bitfield order! */
5775 me_id = (ring_id & 0x60) >> 5;
5776 pipe_id = (ring_id & 0x18) >> 3;
5777 queue_id = (ring_id & 0x7) >> 0;
5778 switch (me_id) {
5779 case 0:
5780 /* This results in a full GPU reset, but all we need to do is soft
5781 * reset the CP for gfx
5782 */
5783 queue_reset = true;
5784 break;
5785 case 1:
5786 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005787 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005788 break;
5789 case 2:
5790 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005791 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005792 break;
5793 }
5794 break;
5795 case 185: /* CP Privileged inst */
5796 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005797 /* XXX check the bitfield order! */
5798 me_id = (ring_id & 0x60) >> 5;
5799 pipe_id = (ring_id & 0x18) >> 3;
5800 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005801 switch (me_id) {
5802 case 0:
5803 /* This results in a full GPU reset, but all we need to do is soft
5804 * reset the CP for gfx
5805 */
5806 queue_reset = true;
5807 break;
5808 case 1:
5809 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005810 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005811 break;
5812 case 2:
5813 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005814 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005815 break;
5816 }
5817 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005818 case 224: /* SDMA trap event */
5819 /* XXX check the bitfield order! */
5820 me_id = (ring_id & 0x3) >> 0;
5821 queue_id = (ring_id & 0xc) >> 2;
5822 DRM_DEBUG("IH: SDMA trap\n");
5823 switch (me_id) {
5824 case 0:
5825 switch (queue_id) {
5826 case 0:
5827 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5828 break;
5829 case 1:
5830 /* XXX compute */
5831 break;
5832 case 2:
5833 /* XXX compute */
5834 break;
5835 }
5836 break;
5837 case 1:
5838 switch (queue_id) {
5839 case 0:
5840 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5841 break;
5842 case 1:
5843 /* XXX compute */
5844 break;
5845 case 2:
5846 /* XXX compute */
5847 break;
5848 }
5849 break;
5850 }
5851 break;
5852 case 241: /* SDMA Privileged inst */
5853 case 247: /* SDMA Privileged inst */
5854 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5855 /* XXX check the bitfield order! */
5856 me_id = (ring_id & 0x3) >> 0;
5857 queue_id = (ring_id & 0xc) >> 2;
5858 switch (me_id) {
5859 case 0:
5860 switch (queue_id) {
5861 case 0:
5862 queue_reset = true;
5863 break;
5864 case 1:
5865 /* XXX compute */
5866 queue_reset = true;
5867 break;
5868 case 2:
5869 /* XXX compute */
5870 queue_reset = true;
5871 break;
5872 }
5873 break;
5874 case 1:
5875 switch (queue_id) {
5876 case 0:
5877 queue_reset = true;
5878 break;
5879 case 1:
5880 /* XXX compute */
5881 queue_reset = true;
5882 break;
5883 case 2:
5884 /* XXX compute */
5885 queue_reset = true;
5886 break;
5887 }
5888 break;
5889 }
5890 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005891 case 233: /* GUI IDLE */
5892 DRM_DEBUG("IH: GUI idle\n");
5893 break;
5894 default:
5895 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5896 break;
5897 }
5898
5899 /* wptr/rptr are in bytes! */
5900 rptr += 16;
5901 rptr &= rdev->ih.ptr_mask;
5902 }
5903 if (queue_hotplug)
5904 schedule_work(&rdev->hotplug_work);
5905 if (queue_reset)
5906 schedule_work(&rdev->reset_work);
5907 rdev->ih.rptr = rptr;
5908 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5909 atomic_set(&rdev->ih.lock, 0);
5910
5911 /* make sure wptr hasn't changed while processing */
5912 wptr = cik_get_ih_wptr(rdev);
5913 if (wptr != rptr)
5914 goto restart_ih;
5915
5916 return IRQ_HANDLED;
5917}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005918
5919/*
5920 * startup/shutdown callbacks
5921 */
5922/**
5923 * cik_startup - program the asic to a functional state
5924 *
5925 * @rdev: radeon_device pointer
5926 *
5927 * Programs the asic to a functional state (CIK).
5928 * Called by cik_init() and cik_resume().
5929 * Returns 0 for success, error for failure.
5930 */
5931static int cik_startup(struct radeon_device *rdev)
5932{
5933 struct radeon_ring *ring;
5934 int r;
5935
5936 if (rdev->flags & RADEON_IS_IGP) {
5937 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5938 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5939 r = cik_init_microcode(rdev);
5940 if (r) {
5941 DRM_ERROR("Failed to load firmware!\n");
5942 return r;
5943 }
5944 }
5945 } else {
5946 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5947 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5948 !rdev->mc_fw) {
5949 r = cik_init_microcode(rdev);
5950 if (r) {
5951 DRM_ERROR("Failed to load firmware!\n");
5952 return r;
5953 }
5954 }
5955
5956 r = ci_mc_load_microcode(rdev);
5957 if (r) {
5958 DRM_ERROR("Failed to load MC firmware!\n");
5959 return r;
5960 }
5961 }
5962
5963 r = r600_vram_scratch_init(rdev);
5964 if (r)
5965 return r;
5966
5967 cik_mc_program(rdev);
5968 r = cik_pcie_gart_enable(rdev);
5969 if (r)
5970 return r;
5971 cik_gpu_init(rdev);
5972
5973 /* allocate rlc buffers */
5974 r = si_rlc_init(rdev);
5975 if (r) {
5976 DRM_ERROR("Failed to init rlc BOs!\n");
5977 return r;
5978 }
5979
5980 /* allocate wb buffer */
5981 r = radeon_wb_init(rdev);
5982 if (r)
5983 return r;
5984
Alex Deucher963e81f2013-06-26 17:37:11 -04005985 /* allocate mec buffers */
5986 r = cik_mec_init(rdev);
5987 if (r) {
5988 DRM_ERROR("Failed to init MEC BOs!\n");
5989 return r;
5990 }
5991
Alex Deucher7bf94a22012-08-17 11:48:29 -04005992 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5993 if (r) {
5994 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5995 return r;
5996 }
5997
Alex Deucher963e81f2013-06-26 17:37:11 -04005998 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5999 if (r) {
6000 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6001 return r;
6002 }
6003
6004 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6005 if (r) {
6006 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6007 return r;
6008 }
6009
Alex Deucher7bf94a22012-08-17 11:48:29 -04006010 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6011 if (r) {
6012 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6013 return r;
6014 }
6015
6016 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6017 if (r) {
6018 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6019 return r;
6020 }
6021
Christian König87167bb2013-04-09 13:39:21 -04006022 r = cik_uvd_resume(rdev);
6023 if (!r) {
6024 r = radeon_fence_driver_start_ring(rdev,
6025 R600_RING_TYPE_UVD_INDEX);
6026 if (r)
6027 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6028 }
6029 if (r)
6030 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6031
Alex Deucher7bf94a22012-08-17 11:48:29 -04006032 /* Enable IRQ */
6033 if (!rdev->irq.installed) {
6034 r = radeon_irq_kms_init(rdev);
6035 if (r)
6036 return r;
6037 }
6038
6039 r = cik_irq_init(rdev);
6040 if (r) {
6041 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6042 radeon_irq_kms_fini(rdev);
6043 return r;
6044 }
6045 cik_irq_set(rdev);
6046
6047 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6048 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6049 CP_RB0_RPTR, CP_RB0_WPTR,
6050 0, 0xfffff, RADEON_CP_PACKET2);
6051 if (r)
6052 return r;
6053
Alex Deucher963e81f2013-06-26 17:37:11 -04006054 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006055 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006056 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6057 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6058 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006059 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006060 if (r)
6061 return r;
6062 ring->me = 1; /* first MEC */
6063 ring->pipe = 0; /* first pipe */
6064 ring->queue = 0; /* first queue */
6065 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6066
Alex Deucher2615b532013-06-03 11:21:58 -04006067 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006068 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6069 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6070 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006071 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006072 if (r)
6073 return r;
6074 /* dGPU only have 1 MEC */
6075 ring->me = 1; /* first MEC */
6076 ring->pipe = 0; /* first pipe */
6077 ring->queue = 1; /* second queue */
6078 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6079
Alex Deucher7bf94a22012-08-17 11:48:29 -04006080 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6081 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6082 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6083 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6084 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6085 if (r)
6086 return r;
6087
6088 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6089 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6090 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6091 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6092 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6093 if (r)
6094 return r;
6095
6096 r = cik_cp_resume(rdev);
6097 if (r)
6098 return r;
6099
6100 r = cik_sdma_resume(rdev);
6101 if (r)
6102 return r;
6103
Christian König87167bb2013-04-09 13:39:21 -04006104 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6105 if (ring->ring_size) {
6106 r = radeon_ring_init(rdev, ring, ring->ring_size,
6107 R600_WB_UVD_RPTR_OFFSET,
6108 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6109 0, 0xfffff, RADEON_CP_PACKET2);
6110 if (!r)
6111 r = r600_uvd_init(rdev);
6112 if (r)
6113 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6114 }
6115
Alex Deucher7bf94a22012-08-17 11:48:29 -04006116 r = radeon_ib_pool_init(rdev);
6117 if (r) {
6118 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6119 return r;
6120 }
6121
6122 r = radeon_vm_manager_init(rdev);
6123 if (r) {
6124 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6125 return r;
6126 }
6127
6128 return 0;
6129}
6130
6131/**
6132 * cik_resume - resume the asic to a functional state
6133 *
6134 * @rdev: radeon_device pointer
6135 *
6136 * Programs the asic to a functional state (CIK).
6137 * Called at resume.
6138 * Returns 0 for success, error for failure.
6139 */
6140int cik_resume(struct radeon_device *rdev)
6141{
6142 int r;
6143
6144 /* post card */
6145 atom_asic_init(rdev->mode_info.atom_context);
6146
Alex Deucher0aafd312013-04-09 14:43:30 -04006147 /* init golden registers */
6148 cik_init_golden_registers(rdev);
6149
Alex Deucher7bf94a22012-08-17 11:48:29 -04006150 rdev->accel_working = true;
6151 r = cik_startup(rdev);
6152 if (r) {
6153 DRM_ERROR("cik startup failed on resume\n");
6154 rdev->accel_working = false;
6155 return r;
6156 }
6157
6158 return r;
6159
6160}
6161
6162/**
6163 * cik_suspend - suspend the asic
6164 *
6165 * @rdev: radeon_device pointer
6166 *
6167 * Bring the chip into a state suitable for suspend (CIK).
6168 * Called at suspend.
6169 * Returns 0 for success.
6170 */
6171int cik_suspend(struct radeon_device *rdev)
6172{
6173 radeon_vm_manager_fini(rdev);
6174 cik_cp_enable(rdev, false);
6175 cik_sdma_enable(rdev, false);
Christian König87167bb2013-04-09 13:39:21 -04006176 r600_uvd_rbc_stop(rdev);
6177 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006178 cik_irq_suspend(rdev);
6179 radeon_wb_disable(rdev);
6180 cik_pcie_gart_disable(rdev);
6181 return 0;
6182}
6183
6184/* Plan is to move initialization in that function and use
6185 * helper function so that radeon_device_init pretty much
6186 * do nothing more than calling asic specific function. This
6187 * should also allow to remove a bunch of callback function
6188 * like vram_info.
6189 */
6190/**
6191 * cik_init - asic specific driver and hw init
6192 *
6193 * @rdev: radeon_device pointer
6194 *
6195 * Setup asic specific driver variables and program the hw
6196 * to a functional state (CIK).
6197 * Called at driver startup.
6198 * Returns 0 for success, errors for failure.
6199 */
6200int cik_init(struct radeon_device *rdev)
6201{
6202 struct radeon_ring *ring;
6203 int r;
6204
6205 /* Read BIOS */
6206 if (!radeon_get_bios(rdev)) {
6207 if (ASIC_IS_AVIVO(rdev))
6208 return -EINVAL;
6209 }
6210 /* Must be an ATOMBIOS */
6211 if (!rdev->is_atom_bios) {
6212 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6213 return -EINVAL;
6214 }
6215 r = radeon_atombios_init(rdev);
6216 if (r)
6217 return r;
6218
6219 /* Post card if necessary */
6220 if (!radeon_card_posted(rdev)) {
6221 if (!rdev->bios) {
6222 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6223 return -EINVAL;
6224 }
6225 DRM_INFO("GPU not posted. posting now...\n");
6226 atom_asic_init(rdev->mode_info.atom_context);
6227 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006228 /* init golden registers */
6229 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006230 /* Initialize scratch registers */
6231 cik_scratch_init(rdev);
6232 /* Initialize surface registers */
6233 radeon_surface_init(rdev);
6234 /* Initialize clocks */
6235 radeon_get_clock_info(rdev->ddev);
6236
6237 /* Fence driver */
6238 r = radeon_fence_driver_init(rdev);
6239 if (r)
6240 return r;
6241
6242 /* initialize memory controller */
6243 r = cik_mc_init(rdev);
6244 if (r)
6245 return r;
6246 /* Memory manager */
6247 r = radeon_bo_init(rdev);
6248 if (r)
6249 return r;
6250
6251 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6252 ring->ring_obj = NULL;
6253 r600_ring_init(rdev, ring, 1024 * 1024);
6254
Alex Deucher963e81f2013-06-26 17:37:11 -04006255 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6256 ring->ring_obj = NULL;
6257 r600_ring_init(rdev, ring, 1024 * 1024);
6258 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6259 if (r)
6260 return r;
6261
6262 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6263 ring->ring_obj = NULL;
6264 r600_ring_init(rdev, ring, 1024 * 1024);
6265 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6266 if (r)
6267 return r;
6268
Alex Deucher7bf94a22012-08-17 11:48:29 -04006269 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6270 ring->ring_obj = NULL;
6271 r600_ring_init(rdev, ring, 256 * 1024);
6272
6273 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6274 ring->ring_obj = NULL;
6275 r600_ring_init(rdev, ring, 256 * 1024);
6276
Christian König87167bb2013-04-09 13:39:21 -04006277 r = radeon_uvd_init(rdev);
6278 if (!r) {
6279 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6280 ring->ring_obj = NULL;
6281 r600_ring_init(rdev, ring, 4096);
6282 }
6283
Alex Deucher7bf94a22012-08-17 11:48:29 -04006284 rdev->ih.ring_obj = NULL;
6285 r600_ih_ring_init(rdev, 64 * 1024);
6286
6287 r = r600_pcie_gart_init(rdev);
6288 if (r)
6289 return r;
6290
6291 rdev->accel_working = true;
6292 r = cik_startup(rdev);
6293 if (r) {
6294 dev_err(rdev->dev, "disabling GPU acceleration\n");
6295 cik_cp_fini(rdev);
6296 cik_sdma_fini(rdev);
6297 cik_irq_fini(rdev);
6298 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006299 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006300 radeon_wb_fini(rdev);
6301 radeon_ib_pool_fini(rdev);
6302 radeon_vm_manager_fini(rdev);
6303 radeon_irq_kms_fini(rdev);
6304 cik_pcie_gart_fini(rdev);
6305 rdev->accel_working = false;
6306 }
6307
6308 /* Don't start up if the MC ucode is missing.
6309 * The default clocks and voltages before the MC ucode
6310 * is loaded are not suffient for advanced operations.
6311 */
6312 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6313 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6314 return -EINVAL;
6315 }
6316
6317 return 0;
6318}
6319
6320/**
6321 * cik_fini - asic specific driver and hw fini
6322 *
6323 * @rdev: radeon_device pointer
6324 *
6325 * Tear down the asic specific driver variables and program the hw
6326 * to an idle state (CIK).
6327 * Called at driver unload.
6328 */
6329void cik_fini(struct radeon_device *rdev)
6330{
6331 cik_cp_fini(rdev);
6332 cik_sdma_fini(rdev);
6333 cik_irq_fini(rdev);
6334 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006335 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006336 radeon_wb_fini(rdev);
6337 radeon_vm_manager_fini(rdev);
6338 radeon_ib_pool_fini(rdev);
6339 radeon_irq_kms_fini(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006340 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006341 cik_pcie_gart_fini(rdev);
6342 r600_vram_scratch_fini(rdev);
6343 radeon_gem_fini(rdev);
6344 radeon_fence_driver_fini(rdev);
6345 radeon_bo_fini(rdev);
6346 radeon_atombios_fini(rdev);
6347 kfree(rdev->bios);
6348 rdev->bios = NULL;
6349}
Alex Deuchercd84a272012-07-20 17:13:13 -04006350
6351/* display watermark setup */
6352/**
6353 * dce8_line_buffer_adjust - Set up the line buffer
6354 *
6355 * @rdev: radeon_device pointer
6356 * @radeon_crtc: the selected display controller
6357 * @mode: the current display mode on the selected display
6358 * controller
6359 *
6360 * Setup up the line buffer allocation for
6361 * the selected display controller (CIK).
6362 * Returns the line buffer size in pixels.
6363 */
6364static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6365 struct radeon_crtc *radeon_crtc,
6366 struct drm_display_mode *mode)
6367{
6368 u32 tmp;
6369
6370 /*
6371 * Line Buffer Setup
6372 * There are 6 line buffers, one for each display controllers.
6373 * There are 3 partitions per LB. Select the number of partitions
6374 * to enable based on the display width. For display widths larger
6375 * than 4096, you need use to use 2 display controllers and combine
6376 * them using the stereo blender.
6377 */
6378 if (radeon_crtc->base.enabled && mode) {
6379 if (mode->crtc_hdisplay < 1920)
6380 tmp = 1;
6381 else if (mode->crtc_hdisplay < 2560)
6382 tmp = 2;
6383 else if (mode->crtc_hdisplay < 4096)
6384 tmp = 0;
6385 else {
6386 DRM_DEBUG_KMS("Mode too big for LB!\n");
6387 tmp = 0;
6388 }
6389 } else
6390 tmp = 1;
6391
6392 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6393 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6394
6395 if (radeon_crtc->base.enabled && mode) {
6396 switch (tmp) {
6397 case 0:
6398 default:
6399 return 4096 * 2;
6400 case 1:
6401 return 1920 * 2;
6402 case 2:
6403 return 2560 * 2;
6404 }
6405 }
6406
6407 /* controller not enabled, so no lb used */
6408 return 0;
6409}
6410
6411/**
6412 * cik_get_number_of_dram_channels - get the number of dram channels
6413 *
6414 * @rdev: radeon_device pointer
6415 *
6416 * Look up the number of video ram channels (CIK).
6417 * Used for display watermark bandwidth calculations
6418 * Returns the number of dram channels
6419 */
6420static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6421{
6422 u32 tmp = RREG32(MC_SHARED_CHMAP);
6423
6424 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6425 case 0:
6426 default:
6427 return 1;
6428 case 1:
6429 return 2;
6430 case 2:
6431 return 4;
6432 case 3:
6433 return 8;
6434 case 4:
6435 return 3;
6436 case 5:
6437 return 6;
6438 case 6:
6439 return 10;
6440 case 7:
6441 return 12;
6442 case 8:
6443 return 16;
6444 }
6445}
6446
6447struct dce8_wm_params {
6448 u32 dram_channels; /* number of dram channels */
6449 u32 yclk; /* bandwidth per dram data pin in kHz */
6450 u32 sclk; /* engine clock in kHz */
6451 u32 disp_clk; /* display clock in kHz */
6452 u32 src_width; /* viewport width */
6453 u32 active_time; /* active display time in ns */
6454 u32 blank_time; /* blank time in ns */
6455 bool interlaced; /* mode is interlaced */
6456 fixed20_12 vsc; /* vertical scale ratio */
6457 u32 num_heads; /* number of active crtcs */
6458 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6459 u32 lb_size; /* line buffer allocated to pipe */
6460 u32 vtaps; /* vertical scaler taps */
6461};
6462
6463/**
6464 * dce8_dram_bandwidth - get the dram bandwidth
6465 *
6466 * @wm: watermark calculation data
6467 *
6468 * Calculate the raw dram bandwidth (CIK).
6469 * Used for display watermark bandwidth calculations
6470 * Returns the dram bandwidth in MBytes/s
6471 */
6472static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6473{
6474 /* Calculate raw DRAM Bandwidth */
6475 fixed20_12 dram_efficiency; /* 0.7 */
6476 fixed20_12 yclk, dram_channels, bandwidth;
6477 fixed20_12 a;
6478
6479 a.full = dfixed_const(1000);
6480 yclk.full = dfixed_const(wm->yclk);
6481 yclk.full = dfixed_div(yclk, a);
6482 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6483 a.full = dfixed_const(10);
6484 dram_efficiency.full = dfixed_const(7);
6485 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6486 bandwidth.full = dfixed_mul(dram_channels, yclk);
6487 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6488
6489 return dfixed_trunc(bandwidth);
6490}
6491
6492/**
6493 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6494 *
6495 * @wm: watermark calculation data
6496 *
6497 * Calculate the dram bandwidth used for display (CIK).
6498 * Used for display watermark bandwidth calculations
6499 * Returns the dram bandwidth for display in MBytes/s
6500 */
6501static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6502{
6503 /* Calculate DRAM Bandwidth and the part allocated to display. */
6504 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6505 fixed20_12 yclk, dram_channels, bandwidth;
6506 fixed20_12 a;
6507
6508 a.full = dfixed_const(1000);
6509 yclk.full = dfixed_const(wm->yclk);
6510 yclk.full = dfixed_div(yclk, a);
6511 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6512 a.full = dfixed_const(10);
6513 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6514 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6515 bandwidth.full = dfixed_mul(dram_channels, yclk);
6516 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6517
6518 return dfixed_trunc(bandwidth);
6519}
6520
6521/**
6522 * dce8_data_return_bandwidth - get the data return bandwidth
6523 *
6524 * @wm: watermark calculation data
6525 *
6526 * Calculate the data return bandwidth used for display (CIK).
6527 * Used for display watermark bandwidth calculations
6528 * Returns the data return bandwidth in MBytes/s
6529 */
6530static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6531{
6532 /* Calculate the display Data return Bandwidth */
6533 fixed20_12 return_efficiency; /* 0.8 */
6534 fixed20_12 sclk, bandwidth;
6535 fixed20_12 a;
6536
6537 a.full = dfixed_const(1000);
6538 sclk.full = dfixed_const(wm->sclk);
6539 sclk.full = dfixed_div(sclk, a);
6540 a.full = dfixed_const(10);
6541 return_efficiency.full = dfixed_const(8);
6542 return_efficiency.full = dfixed_div(return_efficiency, a);
6543 a.full = dfixed_const(32);
6544 bandwidth.full = dfixed_mul(a, sclk);
6545 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6546
6547 return dfixed_trunc(bandwidth);
6548}
6549
6550/**
6551 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6552 *
6553 * @wm: watermark calculation data
6554 *
6555 * Calculate the dmif bandwidth used for display (CIK).
6556 * Used for display watermark bandwidth calculations
6557 * Returns the dmif bandwidth in MBytes/s
6558 */
6559static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6560{
6561 /* Calculate the DMIF Request Bandwidth */
6562 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6563 fixed20_12 disp_clk, bandwidth;
6564 fixed20_12 a, b;
6565
6566 a.full = dfixed_const(1000);
6567 disp_clk.full = dfixed_const(wm->disp_clk);
6568 disp_clk.full = dfixed_div(disp_clk, a);
6569 a.full = dfixed_const(32);
6570 b.full = dfixed_mul(a, disp_clk);
6571
6572 a.full = dfixed_const(10);
6573 disp_clk_request_efficiency.full = dfixed_const(8);
6574 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6575
6576 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6577
6578 return dfixed_trunc(bandwidth);
6579}
6580
6581/**
6582 * dce8_available_bandwidth - get the min available bandwidth
6583 *
6584 * @wm: watermark calculation data
6585 *
6586 * Calculate the min available bandwidth used for display (CIK).
6587 * Used for display watermark bandwidth calculations
6588 * Returns the min available bandwidth in MBytes/s
6589 */
6590static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6591{
6592 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6593 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6594 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6595 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6596
6597 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6598}
6599
6600/**
6601 * dce8_average_bandwidth - get the average available bandwidth
6602 *
6603 * @wm: watermark calculation data
6604 *
6605 * Calculate the average available bandwidth used for display (CIK).
6606 * Used for display watermark bandwidth calculations
6607 * Returns the average available bandwidth in MBytes/s
6608 */
6609static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6610{
6611 /* Calculate the display mode Average Bandwidth
6612 * DisplayMode should contain the source and destination dimensions,
6613 * timing, etc.
6614 */
6615 fixed20_12 bpp;
6616 fixed20_12 line_time;
6617 fixed20_12 src_width;
6618 fixed20_12 bandwidth;
6619 fixed20_12 a;
6620
6621 a.full = dfixed_const(1000);
6622 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6623 line_time.full = dfixed_div(line_time, a);
6624 bpp.full = dfixed_const(wm->bytes_per_pixel);
6625 src_width.full = dfixed_const(wm->src_width);
6626 bandwidth.full = dfixed_mul(src_width, bpp);
6627 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6628 bandwidth.full = dfixed_div(bandwidth, line_time);
6629
6630 return dfixed_trunc(bandwidth);
6631}
6632
6633/**
6634 * dce8_latency_watermark - get the latency watermark
6635 *
6636 * @wm: watermark calculation data
6637 *
6638 * Calculate the latency watermark (CIK).
6639 * Used for display watermark bandwidth calculations
6640 * Returns the latency watermark in ns
6641 */
6642static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6643{
6644 /* First calculate the latency in ns */
6645 u32 mc_latency = 2000; /* 2000 ns. */
6646 u32 available_bandwidth = dce8_available_bandwidth(wm);
6647 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6648 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6649 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6650 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6651 (wm->num_heads * cursor_line_pair_return_time);
6652 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6653 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6654 u32 tmp, dmif_size = 12288;
6655 fixed20_12 a, b, c;
6656
6657 if (wm->num_heads == 0)
6658 return 0;
6659
6660 a.full = dfixed_const(2);
6661 b.full = dfixed_const(1);
6662 if ((wm->vsc.full > a.full) ||
6663 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6664 (wm->vtaps >= 5) ||
6665 ((wm->vsc.full >= a.full) && wm->interlaced))
6666 max_src_lines_per_dst_line = 4;
6667 else
6668 max_src_lines_per_dst_line = 2;
6669
6670 a.full = dfixed_const(available_bandwidth);
6671 b.full = dfixed_const(wm->num_heads);
6672 a.full = dfixed_div(a, b);
6673
6674 b.full = dfixed_const(mc_latency + 512);
6675 c.full = dfixed_const(wm->disp_clk);
6676 b.full = dfixed_div(b, c);
6677
6678 c.full = dfixed_const(dmif_size);
6679 b.full = dfixed_div(c, b);
6680
6681 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6682
6683 b.full = dfixed_const(1000);
6684 c.full = dfixed_const(wm->disp_clk);
6685 b.full = dfixed_div(c, b);
6686 c.full = dfixed_const(wm->bytes_per_pixel);
6687 b.full = dfixed_mul(b, c);
6688
6689 lb_fill_bw = min(tmp, dfixed_trunc(b));
6690
6691 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6692 b.full = dfixed_const(1000);
6693 c.full = dfixed_const(lb_fill_bw);
6694 b.full = dfixed_div(c, b);
6695 a.full = dfixed_div(a, b);
6696 line_fill_time = dfixed_trunc(a);
6697
6698 if (line_fill_time < wm->active_time)
6699 return latency;
6700 else
6701 return latency + (line_fill_time - wm->active_time);
6702
6703}
6704
6705/**
6706 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6707 * average and available dram bandwidth
6708 *
6709 * @wm: watermark calculation data
6710 *
6711 * Check if the display average bandwidth fits in the display
6712 * dram bandwidth (CIK).
6713 * Used for display watermark bandwidth calculations
6714 * Returns true if the display fits, false if not.
6715 */
6716static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6717{
6718 if (dce8_average_bandwidth(wm) <=
6719 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6720 return true;
6721 else
6722 return false;
6723}
6724
6725/**
6726 * dce8_average_bandwidth_vs_available_bandwidth - check
6727 * average and available bandwidth
6728 *
6729 * @wm: watermark calculation data
6730 *
6731 * Check if the display average bandwidth fits in the display
6732 * available bandwidth (CIK).
6733 * Used for display watermark bandwidth calculations
6734 * Returns true if the display fits, false if not.
6735 */
6736static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6737{
6738 if (dce8_average_bandwidth(wm) <=
6739 (dce8_available_bandwidth(wm) / wm->num_heads))
6740 return true;
6741 else
6742 return false;
6743}
6744
6745/**
6746 * dce8_check_latency_hiding - check latency hiding
6747 *
6748 * @wm: watermark calculation data
6749 *
6750 * Check latency hiding (CIK).
6751 * Used for display watermark bandwidth calculations
6752 * Returns true if the display fits, false if not.
6753 */
6754static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6755{
6756 u32 lb_partitions = wm->lb_size / wm->src_width;
6757 u32 line_time = wm->active_time + wm->blank_time;
6758 u32 latency_tolerant_lines;
6759 u32 latency_hiding;
6760 fixed20_12 a;
6761
6762 a.full = dfixed_const(1);
6763 if (wm->vsc.full > a.full)
6764 latency_tolerant_lines = 1;
6765 else {
6766 if (lb_partitions <= (wm->vtaps + 1))
6767 latency_tolerant_lines = 1;
6768 else
6769 latency_tolerant_lines = 2;
6770 }
6771
6772 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6773
6774 if (dce8_latency_watermark(wm) <= latency_hiding)
6775 return true;
6776 else
6777 return false;
6778}
6779
6780/**
6781 * dce8_program_watermarks - program display watermarks
6782 *
6783 * @rdev: radeon_device pointer
6784 * @radeon_crtc: the selected display controller
6785 * @lb_size: line buffer size
6786 * @num_heads: number of display controllers in use
6787 *
6788 * Calculate and program the display watermarks for the
6789 * selected display controller (CIK).
6790 */
6791static void dce8_program_watermarks(struct radeon_device *rdev,
6792 struct radeon_crtc *radeon_crtc,
6793 u32 lb_size, u32 num_heads)
6794{
6795 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6796 struct dce8_wm_params wm;
6797 u32 pixel_period;
6798 u32 line_time = 0;
6799 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6800 u32 tmp, wm_mask;
6801
6802 if (radeon_crtc->base.enabled && num_heads && mode) {
6803 pixel_period = 1000000 / (u32)mode->clock;
6804 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6805
6806 wm.yclk = rdev->pm.current_mclk * 10;
6807 wm.sclk = rdev->pm.current_sclk * 10;
6808 wm.disp_clk = mode->clock;
6809 wm.src_width = mode->crtc_hdisplay;
6810 wm.active_time = mode->crtc_hdisplay * pixel_period;
6811 wm.blank_time = line_time - wm.active_time;
6812 wm.interlaced = false;
6813 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6814 wm.interlaced = true;
6815 wm.vsc = radeon_crtc->vsc;
6816 wm.vtaps = 1;
6817 if (radeon_crtc->rmx_type != RMX_OFF)
6818 wm.vtaps = 2;
6819 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6820 wm.lb_size = lb_size;
6821 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6822 wm.num_heads = num_heads;
6823
6824 /* set for high clocks */
6825 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6826 /* set for low clocks */
6827 /* wm.yclk = low clk; wm.sclk = low clk */
6828 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6829
6830 /* possibly force display priority to high */
6831 /* should really do this at mode validation time... */
6832 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6833 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6834 !dce8_check_latency_hiding(&wm) ||
6835 (rdev->disp_priority == 2)) {
6836 DRM_DEBUG_KMS("force priority to high\n");
6837 }
6838 }
6839
6840 /* select wm A */
6841 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6842 tmp = wm_mask;
6843 tmp &= ~LATENCY_WATERMARK_MASK(3);
6844 tmp |= LATENCY_WATERMARK_MASK(1);
6845 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6846 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6847 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6848 LATENCY_HIGH_WATERMARK(line_time)));
6849 /* select wm B */
6850 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6851 tmp &= ~LATENCY_WATERMARK_MASK(3);
6852 tmp |= LATENCY_WATERMARK_MASK(2);
6853 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6854 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6855 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6856 LATENCY_HIGH_WATERMARK(line_time)));
6857 /* restore original selection */
6858 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6859}
6860
6861/**
6862 * dce8_bandwidth_update - program display watermarks
6863 *
6864 * @rdev: radeon_device pointer
6865 *
6866 * Calculate and program the display watermarks and line
6867 * buffer allocation (CIK).
6868 */
6869void dce8_bandwidth_update(struct radeon_device *rdev)
6870{
6871 struct drm_display_mode *mode = NULL;
6872 u32 num_heads = 0, lb_size;
6873 int i;
6874
6875 radeon_update_display_priority(rdev);
6876
6877 for (i = 0; i < rdev->num_crtc; i++) {
6878 if (rdev->mode_info.crtcs[i]->base.enabled)
6879 num_heads++;
6880 }
6881 for (i = 0; i < rdev->num_crtc; i++) {
6882 mode = &rdev->mode_info.crtcs[i]->base.mode;
6883 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6884 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6885 }
6886}
Alex Deucher44fa3462012-12-18 22:17:00 -05006887
6888/**
6889 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6890 *
6891 * @rdev: radeon_device pointer
6892 *
6893 * Fetches a GPU clock counter snapshot (SI).
6894 * Returns the 64 bit clock counter snapshot.
6895 */
6896uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6897{
6898 uint64_t clock;
6899
6900 mutex_lock(&rdev->gpu_clock_mutex);
6901 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6902 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6903 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6904 mutex_unlock(&rdev->gpu_clock_mutex);
6905 return clock;
6906}
6907
Christian König87167bb2013-04-09 13:39:21 -04006908static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6909 u32 cntl_reg, u32 status_reg)
6910{
6911 int r, i;
6912 struct atom_clock_dividers dividers;
6913 uint32_t tmp;
6914
6915 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6916 clock, false, &dividers);
6917 if (r)
6918 return r;
6919
6920 tmp = RREG32_SMC(cntl_reg);
6921 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6922 tmp |= dividers.post_divider;
6923 WREG32_SMC(cntl_reg, tmp);
6924
6925 for (i = 0; i < 100; i++) {
6926 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6927 break;
6928 mdelay(10);
6929 }
6930 if (i == 100)
6931 return -ETIMEDOUT;
6932
6933 return 0;
6934}
6935
6936int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6937{
6938 int r = 0;
6939
6940 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6941 if (r)
6942 return r;
6943
6944 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6945 return r;
6946}
6947
6948int cik_uvd_resume(struct radeon_device *rdev)
6949{
6950 uint64_t addr;
6951 uint32_t size;
6952 int r;
6953
6954 r = radeon_uvd_resume(rdev);
6955 if (r)
6956 return r;
6957
6958 /* programm the VCPU memory controller bits 0-27 */
6959 addr = rdev->uvd.gpu_addr >> 3;
6960 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6961 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6962 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6963
6964 addr += size;
6965 size = RADEON_UVD_STACK_SIZE >> 3;
6966 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6967 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6968
6969 addr += size;
6970 size = RADEON_UVD_HEAP_SIZE >> 3;
6971 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6972 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6973
6974 /* bits 28-31 */
6975 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6976 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6977
6978 /* bits 32-39 */
6979 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6980 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6981
6982 return 0;
6983}