blob: a30fb32c23014649db26e9b1ed7a06fdac711b5b [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher02c81322012-12-18 21:43:07 -050034
35MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040041MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050042MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
43MODULE_FIRMWARE("radeon/KAVERI_me.bin");
44MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
45MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
46MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040047MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050048MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
49MODULE_FIRMWARE("radeon/KABINI_me.bin");
50MODULE_FIRMWARE("radeon/KABINI_ce.bin");
51MODULE_FIRMWARE("radeon/KABINI_mec.bin");
52MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040053MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050054
Alex Deuchera59781b2012-11-09 10:45:57 -050055extern int r600_ih_ring_alloc(struct radeon_device *rdev);
56extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040057extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
58extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040059extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040060extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040061extern void si_rlc_fini(struct radeon_device *rdev);
62extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040063static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040064static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040065
Alex Deucher6e2c3c02013-04-03 19:28:32 -040066/*
67 * Indirect registers accessor
68 */
69u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
70{
71 u32 r;
72
73 WREG32(PCIE_INDEX, reg);
74 (void)RREG32(PCIE_INDEX);
75 r = RREG32(PCIE_DATA);
76 return r;
77}
78
79void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
80{
81 WREG32(PCIE_INDEX, reg);
82 (void)RREG32(PCIE_INDEX);
83 WREG32(PCIE_DATA, v);
84 (void)RREG32(PCIE_DATA);
85}
86
Alex Deucher0aafd312013-04-09 14:43:30 -040087static const u32 bonaire_golden_spm_registers[] =
88{
89 0x30800, 0xe0ffffff, 0xe0000000
90};
91
92static const u32 bonaire_golden_common_registers[] =
93{
94 0xc770, 0xffffffff, 0x00000800,
95 0xc774, 0xffffffff, 0x00000800,
96 0xc798, 0xffffffff, 0x00007fbf,
97 0xc79c, 0xffffffff, 0x00007faf
98};
99
100static const u32 bonaire_golden_registers[] =
101{
102 0x3354, 0x00000333, 0x00000333,
103 0x3350, 0x000c0fc0, 0x00040200,
104 0x9a10, 0x00010000, 0x00058208,
105 0x3c000, 0xffff1fff, 0x00140000,
106 0x3c200, 0xfdfc0fff, 0x00000100,
107 0x3c234, 0x40000000, 0x40000200,
108 0x9830, 0xffffffff, 0x00000000,
109 0x9834, 0xf00fffff, 0x00000400,
110 0x9838, 0x0002021c, 0x00020200,
111 0xc78, 0x00000080, 0x00000000,
112 0x5bb0, 0x000000f0, 0x00000070,
113 0x5bc0, 0xf0311fff, 0x80300000,
114 0x98f8, 0x73773777, 0x12010001,
115 0x350c, 0x00810000, 0x408af000,
116 0x7030, 0x31000111, 0x00000011,
117 0x2f48, 0x73773777, 0x12010001,
118 0x220c, 0x00007fb6, 0x0021a1b1,
119 0x2210, 0x00007fb6, 0x002021b1,
120 0x2180, 0x00007fb6, 0x00002191,
121 0x2218, 0x00007fb6, 0x002121b1,
122 0x221c, 0x00007fb6, 0x002021b1,
123 0x21dc, 0x00007fb6, 0x00002191,
124 0x21e0, 0x00007fb6, 0x00002191,
125 0x3628, 0x0000003f, 0x0000000a,
126 0x362c, 0x0000003f, 0x0000000a,
127 0x2ae4, 0x00073ffe, 0x000022a2,
128 0x240c, 0x000007ff, 0x00000000,
129 0x8a14, 0xf000003f, 0x00000007,
130 0x8bf0, 0x00002001, 0x00000001,
131 0x8b24, 0xffffffff, 0x00ffffff,
132 0x30a04, 0x0000ff0f, 0x00000000,
133 0x28a4c, 0x07ffffff, 0x06000000,
134 0x4d8, 0x00000fff, 0x00000100,
135 0x3e78, 0x00000001, 0x00000002,
136 0x9100, 0x03000000, 0x0362c688,
137 0x8c00, 0x000000ff, 0x00000001,
138 0xe40, 0x00001fff, 0x00001fff,
139 0x9060, 0x0000007f, 0x00000020,
140 0x9508, 0x00010000, 0x00010000,
141 0xac14, 0x000003ff, 0x000000f3,
142 0xac0c, 0xffffffff, 0x00001032
143};
144
145static const u32 bonaire_mgcg_cgcg_init[] =
146{
147 0xc420, 0xffffffff, 0xfffffffc,
148 0x30800, 0xffffffff, 0xe0000000,
149 0x3c2a0, 0xffffffff, 0x00000100,
150 0x3c208, 0xffffffff, 0x00000100,
151 0x3c2c0, 0xffffffff, 0xc0000100,
152 0x3c2c8, 0xffffffff, 0xc0000100,
153 0x3c2c4, 0xffffffff, 0xc0000100,
154 0x55e4, 0xffffffff, 0x00600100,
155 0x3c280, 0xffffffff, 0x00000100,
156 0x3c214, 0xffffffff, 0x06000100,
157 0x3c220, 0xffffffff, 0x00000100,
158 0x3c218, 0xffffffff, 0x06000100,
159 0x3c204, 0xffffffff, 0x00000100,
160 0x3c2e0, 0xffffffff, 0x00000100,
161 0x3c224, 0xffffffff, 0x00000100,
162 0x3c200, 0xffffffff, 0x00000100,
163 0x3c230, 0xffffffff, 0x00000100,
164 0x3c234, 0xffffffff, 0x00000100,
165 0x3c250, 0xffffffff, 0x00000100,
166 0x3c254, 0xffffffff, 0x00000100,
167 0x3c258, 0xffffffff, 0x00000100,
168 0x3c25c, 0xffffffff, 0x00000100,
169 0x3c260, 0xffffffff, 0x00000100,
170 0x3c27c, 0xffffffff, 0x00000100,
171 0x3c278, 0xffffffff, 0x00000100,
172 0x3c210, 0xffffffff, 0x06000100,
173 0x3c290, 0xffffffff, 0x00000100,
174 0x3c274, 0xffffffff, 0x00000100,
175 0x3c2b4, 0xffffffff, 0x00000100,
176 0x3c2b0, 0xffffffff, 0x00000100,
177 0x3c270, 0xffffffff, 0x00000100,
178 0x30800, 0xffffffff, 0xe0000000,
179 0x3c020, 0xffffffff, 0x00010000,
180 0x3c024, 0xffffffff, 0x00030002,
181 0x3c028, 0xffffffff, 0x00040007,
182 0x3c02c, 0xffffffff, 0x00060005,
183 0x3c030, 0xffffffff, 0x00090008,
184 0x3c034, 0xffffffff, 0x00010000,
185 0x3c038, 0xffffffff, 0x00030002,
186 0x3c03c, 0xffffffff, 0x00040007,
187 0x3c040, 0xffffffff, 0x00060005,
188 0x3c044, 0xffffffff, 0x00090008,
189 0x3c048, 0xffffffff, 0x00010000,
190 0x3c04c, 0xffffffff, 0x00030002,
191 0x3c050, 0xffffffff, 0x00040007,
192 0x3c054, 0xffffffff, 0x00060005,
193 0x3c058, 0xffffffff, 0x00090008,
194 0x3c05c, 0xffffffff, 0x00010000,
195 0x3c060, 0xffffffff, 0x00030002,
196 0x3c064, 0xffffffff, 0x00040007,
197 0x3c068, 0xffffffff, 0x00060005,
198 0x3c06c, 0xffffffff, 0x00090008,
199 0x3c070, 0xffffffff, 0x00010000,
200 0x3c074, 0xffffffff, 0x00030002,
201 0x3c078, 0xffffffff, 0x00040007,
202 0x3c07c, 0xffffffff, 0x00060005,
203 0x3c080, 0xffffffff, 0x00090008,
204 0x3c084, 0xffffffff, 0x00010000,
205 0x3c088, 0xffffffff, 0x00030002,
206 0x3c08c, 0xffffffff, 0x00040007,
207 0x3c090, 0xffffffff, 0x00060005,
208 0x3c094, 0xffffffff, 0x00090008,
209 0x3c098, 0xffffffff, 0x00010000,
210 0x3c09c, 0xffffffff, 0x00030002,
211 0x3c0a0, 0xffffffff, 0x00040007,
212 0x3c0a4, 0xffffffff, 0x00060005,
213 0x3c0a8, 0xffffffff, 0x00090008,
214 0x3c000, 0xffffffff, 0x96e00200,
215 0x8708, 0xffffffff, 0x00900100,
216 0xc424, 0xffffffff, 0x0020003f,
217 0x38, 0xffffffff, 0x0140001c,
218 0x3c, 0x000f0000, 0x000f0000,
219 0x220, 0xffffffff, 0xC060000C,
220 0x224, 0xc0000fff, 0x00000100,
221 0xf90, 0xffffffff, 0x00000100,
222 0xf98, 0x00000101, 0x00000000,
223 0x20a8, 0xffffffff, 0x00000104,
224 0x55e4, 0xff000fff, 0x00000100,
225 0x30cc, 0xc0000fff, 0x00000104,
226 0xc1e4, 0x00000001, 0x00000001,
227 0xd00c, 0xff000ff0, 0x00000100,
228 0xd80c, 0xff000ff0, 0x00000100
229};
230
231static const u32 spectre_golden_spm_registers[] =
232{
233 0x30800, 0xe0ffffff, 0xe0000000
234};
235
236static const u32 spectre_golden_common_registers[] =
237{
238 0xc770, 0xffffffff, 0x00000800,
239 0xc774, 0xffffffff, 0x00000800,
240 0xc798, 0xffffffff, 0x00007fbf,
241 0xc79c, 0xffffffff, 0x00007faf
242};
243
244static const u32 spectre_golden_registers[] =
245{
246 0x3c000, 0xffff1fff, 0x96940200,
247 0x3c00c, 0xffff0001, 0xff000000,
248 0x3c200, 0xfffc0fff, 0x00000100,
249 0x6ed8, 0x00010101, 0x00010000,
250 0x9834, 0xf00fffff, 0x00000400,
251 0x9838, 0xfffffffc, 0x00020200,
252 0x5bb0, 0x000000f0, 0x00000070,
253 0x5bc0, 0xf0311fff, 0x80300000,
254 0x98f8, 0x73773777, 0x12010001,
255 0x9b7c, 0x00ff0000, 0x00fc0000,
256 0x2f48, 0x73773777, 0x12010001,
257 0x8a14, 0xf000003f, 0x00000007,
258 0x8b24, 0xffffffff, 0x00ffffff,
259 0x28350, 0x3f3f3fff, 0x00000082,
260 0x28355, 0x0000003f, 0x00000000,
261 0x3e78, 0x00000001, 0x00000002,
262 0x913c, 0xffff03df, 0x00000004,
263 0xc768, 0x00000008, 0x00000008,
264 0x8c00, 0x000008ff, 0x00000800,
265 0x9508, 0x00010000, 0x00010000,
266 0xac0c, 0xffffffff, 0x54763210,
267 0x214f8, 0x01ff01ff, 0x00000002,
268 0x21498, 0x007ff800, 0x00200000,
269 0x2015c, 0xffffffff, 0x00000f40,
270 0x30934, 0xffffffff, 0x00000001
271};
272
273static const u32 spectre_mgcg_cgcg_init[] =
274{
275 0xc420, 0xffffffff, 0xfffffffc,
276 0x30800, 0xffffffff, 0xe0000000,
277 0x3c2a0, 0xffffffff, 0x00000100,
278 0x3c208, 0xffffffff, 0x00000100,
279 0x3c2c0, 0xffffffff, 0x00000100,
280 0x3c2c8, 0xffffffff, 0x00000100,
281 0x3c2c4, 0xffffffff, 0x00000100,
282 0x55e4, 0xffffffff, 0x00600100,
283 0x3c280, 0xffffffff, 0x00000100,
284 0x3c214, 0xffffffff, 0x06000100,
285 0x3c220, 0xffffffff, 0x00000100,
286 0x3c218, 0xffffffff, 0x06000100,
287 0x3c204, 0xffffffff, 0x00000100,
288 0x3c2e0, 0xffffffff, 0x00000100,
289 0x3c224, 0xffffffff, 0x00000100,
290 0x3c200, 0xffffffff, 0x00000100,
291 0x3c230, 0xffffffff, 0x00000100,
292 0x3c234, 0xffffffff, 0x00000100,
293 0x3c250, 0xffffffff, 0x00000100,
294 0x3c254, 0xffffffff, 0x00000100,
295 0x3c258, 0xffffffff, 0x00000100,
296 0x3c25c, 0xffffffff, 0x00000100,
297 0x3c260, 0xffffffff, 0x00000100,
298 0x3c27c, 0xffffffff, 0x00000100,
299 0x3c278, 0xffffffff, 0x00000100,
300 0x3c210, 0xffffffff, 0x06000100,
301 0x3c290, 0xffffffff, 0x00000100,
302 0x3c274, 0xffffffff, 0x00000100,
303 0x3c2b4, 0xffffffff, 0x00000100,
304 0x3c2b0, 0xffffffff, 0x00000100,
305 0x3c270, 0xffffffff, 0x00000100,
306 0x30800, 0xffffffff, 0xe0000000,
307 0x3c020, 0xffffffff, 0x00010000,
308 0x3c024, 0xffffffff, 0x00030002,
309 0x3c028, 0xffffffff, 0x00040007,
310 0x3c02c, 0xffffffff, 0x00060005,
311 0x3c030, 0xffffffff, 0x00090008,
312 0x3c034, 0xffffffff, 0x00010000,
313 0x3c038, 0xffffffff, 0x00030002,
314 0x3c03c, 0xffffffff, 0x00040007,
315 0x3c040, 0xffffffff, 0x00060005,
316 0x3c044, 0xffffffff, 0x00090008,
317 0x3c048, 0xffffffff, 0x00010000,
318 0x3c04c, 0xffffffff, 0x00030002,
319 0x3c050, 0xffffffff, 0x00040007,
320 0x3c054, 0xffffffff, 0x00060005,
321 0x3c058, 0xffffffff, 0x00090008,
322 0x3c05c, 0xffffffff, 0x00010000,
323 0x3c060, 0xffffffff, 0x00030002,
324 0x3c064, 0xffffffff, 0x00040007,
325 0x3c068, 0xffffffff, 0x00060005,
326 0x3c06c, 0xffffffff, 0x00090008,
327 0x3c070, 0xffffffff, 0x00010000,
328 0x3c074, 0xffffffff, 0x00030002,
329 0x3c078, 0xffffffff, 0x00040007,
330 0x3c07c, 0xffffffff, 0x00060005,
331 0x3c080, 0xffffffff, 0x00090008,
332 0x3c084, 0xffffffff, 0x00010000,
333 0x3c088, 0xffffffff, 0x00030002,
334 0x3c08c, 0xffffffff, 0x00040007,
335 0x3c090, 0xffffffff, 0x00060005,
336 0x3c094, 0xffffffff, 0x00090008,
337 0x3c098, 0xffffffff, 0x00010000,
338 0x3c09c, 0xffffffff, 0x00030002,
339 0x3c0a0, 0xffffffff, 0x00040007,
340 0x3c0a4, 0xffffffff, 0x00060005,
341 0x3c0a8, 0xffffffff, 0x00090008,
342 0x3c0ac, 0xffffffff, 0x00010000,
343 0x3c0b0, 0xffffffff, 0x00030002,
344 0x3c0b4, 0xffffffff, 0x00040007,
345 0x3c0b8, 0xffffffff, 0x00060005,
346 0x3c0bc, 0xffffffff, 0x00090008,
347 0x3c000, 0xffffffff, 0x96e00200,
348 0x8708, 0xffffffff, 0x00900100,
349 0xc424, 0xffffffff, 0x0020003f,
350 0x38, 0xffffffff, 0x0140001c,
351 0x3c, 0x000f0000, 0x000f0000,
352 0x220, 0xffffffff, 0xC060000C,
353 0x224, 0xc0000fff, 0x00000100,
354 0xf90, 0xffffffff, 0x00000100,
355 0xf98, 0x00000101, 0x00000000,
356 0x20a8, 0xffffffff, 0x00000104,
357 0x55e4, 0xff000fff, 0x00000100,
358 0x30cc, 0xc0000fff, 0x00000104,
359 0xc1e4, 0x00000001, 0x00000001,
360 0xd00c, 0xff000ff0, 0x00000100,
361 0xd80c, 0xff000ff0, 0x00000100
362};
363
364static const u32 kalindi_golden_spm_registers[] =
365{
366 0x30800, 0xe0ffffff, 0xe0000000
367};
368
369static const u32 kalindi_golden_common_registers[] =
370{
371 0xc770, 0xffffffff, 0x00000800,
372 0xc774, 0xffffffff, 0x00000800,
373 0xc798, 0xffffffff, 0x00007fbf,
374 0xc79c, 0xffffffff, 0x00007faf
375};
376
377static const u32 kalindi_golden_registers[] =
378{
379 0x3c000, 0xffffdfff, 0x6e944040,
380 0x55e4, 0xff607fff, 0xfc000100,
381 0x3c220, 0xff000fff, 0x00000100,
382 0x3c224, 0xff000fff, 0x00000100,
383 0x3c200, 0xfffc0fff, 0x00000100,
384 0x6ed8, 0x00010101, 0x00010000,
385 0x9830, 0xffffffff, 0x00000000,
386 0x9834, 0xf00fffff, 0x00000400,
387 0x5bb0, 0x000000f0, 0x00000070,
388 0x5bc0, 0xf0311fff, 0x80300000,
389 0x98f8, 0x73773777, 0x12010001,
390 0x98fc, 0xffffffff, 0x00000010,
391 0x9b7c, 0x00ff0000, 0x00fc0000,
392 0x8030, 0x00001f0f, 0x0000100a,
393 0x2f48, 0x73773777, 0x12010001,
394 0x2408, 0x000fffff, 0x000c007f,
395 0x8a14, 0xf000003f, 0x00000007,
396 0x8b24, 0x3fff3fff, 0x00ffcfff,
397 0x30a04, 0x0000ff0f, 0x00000000,
398 0x28a4c, 0x07ffffff, 0x06000000,
399 0x4d8, 0x00000fff, 0x00000100,
400 0x3e78, 0x00000001, 0x00000002,
401 0xc768, 0x00000008, 0x00000008,
402 0x8c00, 0x000000ff, 0x00000003,
403 0x214f8, 0x01ff01ff, 0x00000002,
404 0x21498, 0x007ff800, 0x00200000,
405 0x2015c, 0xffffffff, 0x00000f40,
406 0x88c4, 0x001f3ae3, 0x00000082,
407 0x88d4, 0x0000001f, 0x00000010,
408 0x30934, 0xffffffff, 0x00000000
409};
410
411static const u32 kalindi_mgcg_cgcg_init[] =
412{
413 0xc420, 0xffffffff, 0xfffffffc,
414 0x30800, 0xffffffff, 0xe0000000,
415 0x3c2a0, 0xffffffff, 0x00000100,
416 0x3c208, 0xffffffff, 0x00000100,
417 0x3c2c0, 0xffffffff, 0x00000100,
418 0x3c2c8, 0xffffffff, 0x00000100,
419 0x3c2c4, 0xffffffff, 0x00000100,
420 0x55e4, 0xffffffff, 0x00600100,
421 0x3c280, 0xffffffff, 0x00000100,
422 0x3c214, 0xffffffff, 0x06000100,
423 0x3c220, 0xffffffff, 0x00000100,
424 0x3c218, 0xffffffff, 0x06000100,
425 0x3c204, 0xffffffff, 0x00000100,
426 0x3c2e0, 0xffffffff, 0x00000100,
427 0x3c224, 0xffffffff, 0x00000100,
428 0x3c200, 0xffffffff, 0x00000100,
429 0x3c230, 0xffffffff, 0x00000100,
430 0x3c234, 0xffffffff, 0x00000100,
431 0x3c250, 0xffffffff, 0x00000100,
432 0x3c254, 0xffffffff, 0x00000100,
433 0x3c258, 0xffffffff, 0x00000100,
434 0x3c25c, 0xffffffff, 0x00000100,
435 0x3c260, 0xffffffff, 0x00000100,
436 0x3c27c, 0xffffffff, 0x00000100,
437 0x3c278, 0xffffffff, 0x00000100,
438 0x3c210, 0xffffffff, 0x06000100,
439 0x3c290, 0xffffffff, 0x00000100,
440 0x3c274, 0xffffffff, 0x00000100,
441 0x3c2b4, 0xffffffff, 0x00000100,
442 0x3c2b0, 0xffffffff, 0x00000100,
443 0x3c270, 0xffffffff, 0x00000100,
444 0x30800, 0xffffffff, 0xe0000000,
445 0x3c020, 0xffffffff, 0x00010000,
446 0x3c024, 0xffffffff, 0x00030002,
447 0x3c028, 0xffffffff, 0x00040007,
448 0x3c02c, 0xffffffff, 0x00060005,
449 0x3c030, 0xffffffff, 0x00090008,
450 0x3c034, 0xffffffff, 0x00010000,
451 0x3c038, 0xffffffff, 0x00030002,
452 0x3c03c, 0xffffffff, 0x00040007,
453 0x3c040, 0xffffffff, 0x00060005,
454 0x3c044, 0xffffffff, 0x00090008,
455 0x3c000, 0xffffffff, 0x96e00200,
456 0x8708, 0xffffffff, 0x00900100,
457 0xc424, 0xffffffff, 0x0020003f,
458 0x38, 0xffffffff, 0x0140001c,
459 0x3c, 0x000f0000, 0x000f0000,
460 0x220, 0xffffffff, 0xC060000C,
461 0x224, 0xc0000fff, 0x00000100,
462 0x20a8, 0xffffffff, 0x00000104,
463 0x55e4, 0xff000fff, 0x00000100,
464 0x30cc, 0xc0000fff, 0x00000104,
465 0xc1e4, 0x00000001, 0x00000001,
466 0xd00c, 0xff000ff0, 0x00000100,
467 0xd80c, 0xff000ff0, 0x00000100
468};
469
470static void cik_init_golden_registers(struct radeon_device *rdev)
471{
472 switch (rdev->family) {
473 case CHIP_BONAIRE:
474 radeon_program_register_sequence(rdev,
475 bonaire_mgcg_cgcg_init,
476 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
477 radeon_program_register_sequence(rdev,
478 bonaire_golden_registers,
479 (const u32)ARRAY_SIZE(bonaire_golden_registers));
480 radeon_program_register_sequence(rdev,
481 bonaire_golden_common_registers,
482 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
483 radeon_program_register_sequence(rdev,
484 bonaire_golden_spm_registers,
485 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
486 break;
487 case CHIP_KABINI:
488 radeon_program_register_sequence(rdev,
489 kalindi_mgcg_cgcg_init,
490 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
491 radeon_program_register_sequence(rdev,
492 kalindi_golden_registers,
493 (const u32)ARRAY_SIZE(kalindi_golden_registers));
494 radeon_program_register_sequence(rdev,
495 kalindi_golden_common_registers,
496 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
497 radeon_program_register_sequence(rdev,
498 kalindi_golden_spm_registers,
499 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
500 break;
501 case CHIP_KAVERI:
502 radeon_program_register_sequence(rdev,
503 spectre_mgcg_cgcg_init,
504 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
505 radeon_program_register_sequence(rdev,
506 spectre_golden_registers,
507 (const u32)ARRAY_SIZE(spectre_golden_registers));
508 radeon_program_register_sequence(rdev,
509 spectre_golden_common_registers,
510 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
511 radeon_program_register_sequence(rdev,
512 spectre_golden_spm_registers,
513 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
514 break;
515 default:
516 break;
517 }
518}
519
Alex Deucher2c679122013-04-09 13:32:18 -0400520/**
521 * cik_get_xclk - get the xclk
522 *
523 * @rdev: radeon_device pointer
524 *
525 * Returns the reference clock used by the gfx engine
526 * (CIK).
527 */
528u32 cik_get_xclk(struct radeon_device *rdev)
529{
530 u32 reference_clock = rdev->clock.spll.reference_freq;
531
532 if (rdev->flags & RADEON_IS_IGP) {
533 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
534 return reference_clock / 2;
535 } else {
536 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
537 return reference_clock / 4;
538 }
539 return reference_clock;
540}
541
Alex Deucher75efdee2013-03-04 12:47:46 -0500542/**
543 * cik_mm_rdoorbell - read a doorbell dword
544 *
545 * @rdev: radeon_device pointer
546 * @offset: byte offset into the aperture
547 *
548 * Returns the value in the doorbell aperture at the
549 * requested offset (CIK).
550 */
551u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
552{
553 if (offset < rdev->doorbell.size) {
554 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
555 } else {
556 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
557 return 0;
558 }
559}
560
561/**
562 * cik_mm_wdoorbell - write a doorbell dword
563 *
564 * @rdev: radeon_device pointer
565 * @offset: byte offset into the aperture
566 * @v: value to write
567 *
568 * Writes @v to the doorbell aperture at the
569 * requested offset (CIK).
570 */
571void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
572{
573 if (offset < rdev->doorbell.size) {
574 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
575 } else {
576 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
577 }
578}
579
Alex Deucherbc8273f2012-06-29 19:44:04 -0400580#define BONAIRE_IO_MC_REGS_SIZE 36
581
582static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
583{
584 {0x00000070, 0x04400000},
585 {0x00000071, 0x80c01803},
586 {0x00000072, 0x00004004},
587 {0x00000073, 0x00000100},
588 {0x00000074, 0x00ff0000},
589 {0x00000075, 0x34000000},
590 {0x00000076, 0x08000014},
591 {0x00000077, 0x00cc08ec},
592 {0x00000078, 0x00000400},
593 {0x00000079, 0x00000000},
594 {0x0000007a, 0x04090000},
595 {0x0000007c, 0x00000000},
596 {0x0000007e, 0x4408a8e8},
597 {0x0000007f, 0x00000304},
598 {0x00000080, 0x00000000},
599 {0x00000082, 0x00000001},
600 {0x00000083, 0x00000002},
601 {0x00000084, 0xf3e4f400},
602 {0x00000085, 0x052024e3},
603 {0x00000087, 0x00000000},
604 {0x00000088, 0x01000000},
605 {0x0000008a, 0x1c0a0000},
606 {0x0000008b, 0xff010000},
607 {0x0000008d, 0xffffefff},
608 {0x0000008e, 0xfff3efff},
609 {0x0000008f, 0xfff3efbf},
610 {0x00000092, 0xf7ffffff},
611 {0x00000093, 0xffffff7f},
612 {0x00000095, 0x00101101},
613 {0x00000096, 0x00000fff},
614 {0x00000097, 0x00116fff},
615 {0x00000098, 0x60010000},
616 {0x00000099, 0x10010000},
617 {0x0000009a, 0x00006000},
618 {0x0000009b, 0x00001000},
619 {0x0000009f, 0x00b48000}
620};
621
Alex Deucherb556b122013-01-29 10:44:22 -0500622/**
623 * cik_srbm_select - select specific register instances
624 *
625 * @rdev: radeon_device pointer
626 * @me: selected ME (micro engine)
627 * @pipe: pipe
628 * @queue: queue
629 * @vmid: VMID
630 *
631 * Switches the currently active registers instances. Some
632 * registers are instanced per VMID, others are instanced per
633 * me/pipe/queue combination.
634 */
635static void cik_srbm_select(struct radeon_device *rdev,
636 u32 me, u32 pipe, u32 queue, u32 vmid)
637{
638 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
639 MEID(me & 0x3) |
640 VMID(vmid & 0xf) |
641 QUEUEID(queue & 0x7));
642 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
643}
644
Alex Deucherbc8273f2012-06-29 19:44:04 -0400645/* ucode loading */
646/**
647 * ci_mc_load_microcode - load MC ucode into the hw
648 *
649 * @rdev: radeon_device pointer
650 *
651 * Load the GDDR MC ucode into the hw (CIK).
652 * Returns 0 on success, error on failure.
653 */
654static int ci_mc_load_microcode(struct radeon_device *rdev)
655{
656 const __be32 *fw_data;
657 u32 running, blackout = 0;
658 u32 *io_mc_regs;
659 int i, ucode_size, regs_size;
660
661 if (!rdev->mc_fw)
662 return -EINVAL;
663
664 switch (rdev->family) {
665 case CHIP_BONAIRE:
666 default:
667 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
668 ucode_size = CIK_MC_UCODE_SIZE;
669 regs_size = BONAIRE_IO_MC_REGS_SIZE;
670 break;
671 }
672
673 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
674
675 if (running == 0) {
676 if (running) {
677 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
678 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
679 }
680
681 /* reset the engine and set to writable */
682 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
683 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
684
685 /* load mc io regs */
686 for (i = 0; i < regs_size; i++) {
687 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
688 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
689 }
690 /* load the MC ucode */
691 fw_data = (const __be32 *)rdev->mc_fw->data;
692 for (i = 0; i < ucode_size; i++)
693 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
694
695 /* put the engine back into the active state */
696 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
699
700 /* wait for training to complete */
701 for (i = 0; i < rdev->usec_timeout; i++) {
702 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
703 break;
704 udelay(1);
705 }
706 for (i = 0; i < rdev->usec_timeout; i++) {
707 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
708 break;
709 udelay(1);
710 }
711
712 if (running)
713 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
714 }
715
716 return 0;
717}
718
Alex Deucher02c81322012-12-18 21:43:07 -0500719/**
720 * cik_init_microcode - load ucode images from disk
721 *
722 * @rdev: radeon_device pointer
723 *
724 * Use the firmware interface to load the ucode images into
725 * the driver (not loaded into hw).
726 * Returns 0 on success, error on failure.
727 */
728static int cik_init_microcode(struct radeon_device *rdev)
729{
Alex Deucher02c81322012-12-18 21:43:07 -0500730 const char *chip_name;
731 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400732 mec_req_size, rlc_req_size, mc_req_size,
733 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500734 char fw_name[30];
735 int err;
736
737 DRM_DEBUG("\n");
738
Alex Deucher02c81322012-12-18 21:43:07 -0500739 switch (rdev->family) {
740 case CHIP_BONAIRE:
741 chip_name = "BONAIRE";
742 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
743 me_req_size = CIK_ME_UCODE_SIZE * 4;
744 ce_req_size = CIK_CE_UCODE_SIZE * 4;
745 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
746 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
747 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400748 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500749 break;
750 case CHIP_KAVERI:
751 chip_name = "KAVERI";
752 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
753 me_req_size = CIK_ME_UCODE_SIZE * 4;
754 ce_req_size = CIK_CE_UCODE_SIZE * 4;
755 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
756 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400757 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500758 break;
759 case CHIP_KABINI:
760 chip_name = "KABINI";
761 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
762 me_req_size = CIK_ME_UCODE_SIZE * 4;
763 ce_req_size = CIK_CE_UCODE_SIZE * 4;
764 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
765 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400766 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500767 break;
768 default: BUG();
769 }
770
771 DRM_INFO("Loading %s Microcode\n", chip_name);
772
773 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400774 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500775 if (err)
776 goto out;
777 if (rdev->pfp_fw->size != pfp_req_size) {
778 printk(KERN_ERR
779 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
780 rdev->pfp_fw->size, fw_name);
781 err = -EINVAL;
782 goto out;
783 }
784
785 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400786 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500787 if (err)
788 goto out;
789 if (rdev->me_fw->size != me_req_size) {
790 printk(KERN_ERR
791 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
792 rdev->me_fw->size, fw_name);
793 err = -EINVAL;
794 }
795
796 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400797 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500798 if (err)
799 goto out;
800 if (rdev->ce_fw->size != ce_req_size) {
801 printk(KERN_ERR
802 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803 rdev->ce_fw->size, fw_name);
804 err = -EINVAL;
805 }
806
807 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400808 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500809 if (err)
810 goto out;
811 if (rdev->mec_fw->size != mec_req_size) {
812 printk(KERN_ERR
813 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
814 rdev->mec_fw->size, fw_name);
815 err = -EINVAL;
816 }
817
818 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400819 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500820 if (err)
821 goto out;
822 if (rdev->rlc_fw->size != rlc_req_size) {
823 printk(KERN_ERR
824 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
825 rdev->rlc_fw->size, fw_name);
826 err = -EINVAL;
827 }
828
Alex Deucher21a93e12013-04-09 12:47:11 -0400829 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400830 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -0400831 if (err)
832 goto out;
833 if (rdev->sdma_fw->size != sdma_req_size) {
834 printk(KERN_ERR
835 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
836 rdev->sdma_fw->size, fw_name);
837 err = -EINVAL;
838 }
839
Alex Deucher02c81322012-12-18 21:43:07 -0500840 /* No MC ucode on APUs */
841 if (!(rdev->flags & RADEON_IS_IGP)) {
842 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400843 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500844 if (err)
845 goto out;
846 if (rdev->mc_fw->size != mc_req_size) {
847 printk(KERN_ERR
848 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
849 rdev->mc_fw->size, fw_name);
850 err = -EINVAL;
851 }
852 }
853
854out:
Alex Deucher02c81322012-12-18 21:43:07 -0500855 if (err) {
856 if (err != -EINVAL)
857 printk(KERN_ERR
858 "cik_cp: Failed to load firmware \"%s\"\n",
859 fw_name);
860 release_firmware(rdev->pfp_fw);
861 rdev->pfp_fw = NULL;
862 release_firmware(rdev->me_fw);
863 rdev->me_fw = NULL;
864 release_firmware(rdev->ce_fw);
865 rdev->ce_fw = NULL;
866 release_firmware(rdev->rlc_fw);
867 rdev->rlc_fw = NULL;
868 release_firmware(rdev->mc_fw);
869 rdev->mc_fw = NULL;
870 }
871 return err;
872}
873
Alex Deucher8cc1a532013-04-09 12:41:24 -0400874/*
875 * Core functions
876 */
877/**
878 * cik_tiling_mode_table_init - init the hw tiling table
879 *
880 * @rdev: radeon_device pointer
881 *
882 * Starting with SI, the tiling setup is done globally in a
883 * set of 32 tiling modes. Rather than selecting each set of
884 * parameters per surface as on older asics, we just select
885 * which index in the tiling table we want to use, and the
886 * surface uses those parameters (CIK).
887 */
888static void cik_tiling_mode_table_init(struct radeon_device *rdev)
889{
890 const u32 num_tile_mode_states = 32;
891 const u32 num_secondary_tile_mode_states = 16;
892 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
893 u32 num_pipe_configs;
894 u32 num_rbs = rdev->config.cik.max_backends_per_se *
895 rdev->config.cik.max_shader_engines;
896
897 switch (rdev->config.cik.mem_row_size_in_kb) {
898 case 1:
899 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
900 break;
901 case 2:
902 default:
903 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
904 break;
905 case 4:
906 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
907 break;
908 }
909
910 num_pipe_configs = rdev->config.cik.max_tile_pipes;
911 if (num_pipe_configs > 8)
912 num_pipe_configs = 8; /* ??? */
913
914 if (num_pipe_configs == 8) {
915 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
916 switch (reg_offset) {
917 case 0:
918 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
920 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
921 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
922 break;
923 case 1:
924 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
925 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
928 break;
929 case 2:
930 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
931 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
932 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
933 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
934 break;
935 case 3:
936 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
937 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
939 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
940 break;
941 case 4:
942 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
943 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
945 TILE_SPLIT(split_equal_to_row_size));
946 break;
947 case 5:
948 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
949 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
950 break;
951 case 6:
952 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
953 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
955 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
956 break;
957 case 7:
958 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
959 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
961 TILE_SPLIT(split_equal_to_row_size));
962 break;
963 case 8:
964 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
966 break;
967 case 9:
968 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
969 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
970 break;
971 case 10:
972 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
976 break;
977 case 11:
978 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
979 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
980 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
981 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
982 break;
983 case 12:
984 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
985 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
986 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
988 break;
989 case 13:
990 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
991 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
992 break;
993 case 14:
994 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
995 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
998 break;
999 case 16:
1000 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1002 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1004 break;
1005 case 17:
1006 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1007 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1010 break;
1011 case 27:
1012 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1014 break;
1015 case 28:
1016 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1020 break;
1021 case 29:
1022 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1023 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1024 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1025 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1026 break;
1027 case 30:
1028 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1029 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1032 break;
1033 default:
1034 gb_tile_moden = 0;
1035 break;
1036 }
Alex Deucher39aee492013-04-10 13:41:25 -04001037 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001038 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1039 }
1040 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1041 switch (reg_offset) {
1042 case 0:
1043 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1046 NUM_BANKS(ADDR_SURF_16_BANK));
1047 break;
1048 case 1:
1049 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1052 NUM_BANKS(ADDR_SURF_16_BANK));
1053 break;
1054 case 2:
1055 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1056 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1057 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1058 NUM_BANKS(ADDR_SURF_16_BANK));
1059 break;
1060 case 3:
1061 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1062 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1063 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1064 NUM_BANKS(ADDR_SURF_16_BANK));
1065 break;
1066 case 4:
1067 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1068 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1069 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1070 NUM_BANKS(ADDR_SURF_8_BANK));
1071 break;
1072 case 5:
1073 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1076 NUM_BANKS(ADDR_SURF_4_BANK));
1077 break;
1078 case 6:
1079 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1080 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1081 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1082 NUM_BANKS(ADDR_SURF_2_BANK));
1083 break;
1084 case 8:
1085 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1088 NUM_BANKS(ADDR_SURF_16_BANK));
1089 break;
1090 case 9:
1091 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1092 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1093 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1094 NUM_BANKS(ADDR_SURF_16_BANK));
1095 break;
1096 case 10:
1097 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1100 NUM_BANKS(ADDR_SURF_16_BANK));
1101 break;
1102 case 11:
1103 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1106 NUM_BANKS(ADDR_SURF_16_BANK));
1107 break;
1108 case 12:
1109 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1110 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1111 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1112 NUM_BANKS(ADDR_SURF_8_BANK));
1113 break;
1114 case 13:
1115 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1116 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1117 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1118 NUM_BANKS(ADDR_SURF_4_BANK));
1119 break;
1120 case 14:
1121 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1124 NUM_BANKS(ADDR_SURF_2_BANK));
1125 break;
1126 default:
1127 gb_tile_moden = 0;
1128 break;
1129 }
1130 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1131 }
1132 } else if (num_pipe_configs == 4) {
1133 if (num_rbs == 4) {
1134 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1135 switch (reg_offset) {
1136 case 0:
1137 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1139 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1140 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1141 break;
1142 case 1:
1143 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1144 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1145 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1146 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1147 break;
1148 case 2:
1149 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1151 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1152 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1153 break;
1154 case 3:
1155 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1157 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1159 break;
1160 case 4:
1161 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1163 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1164 TILE_SPLIT(split_equal_to_row_size));
1165 break;
1166 case 5:
1167 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1169 break;
1170 case 6:
1171 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1173 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1175 break;
1176 case 7:
1177 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1178 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1179 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1180 TILE_SPLIT(split_equal_to_row_size));
1181 break;
1182 case 8:
1183 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1184 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1185 break;
1186 case 9:
1187 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1188 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1189 break;
1190 case 10:
1191 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1195 break;
1196 case 11:
1197 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1198 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1199 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1201 break;
1202 case 12:
1203 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1204 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1205 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1207 break;
1208 case 13:
1209 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1210 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1211 break;
1212 case 14:
1213 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1214 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1215 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1217 break;
1218 case 16:
1219 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1220 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1221 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1223 break;
1224 case 17:
1225 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1226 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1227 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1229 break;
1230 case 27:
1231 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1232 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1233 break;
1234 case 28:
1235 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1236 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1237 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1239 break;
1240 case 29:
1241 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1242 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1243 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1245 break;
1246 case 30:
1247 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1248 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1249 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1251 break;
1252 default:
1253 gb_tile_moden = 0;
1254 break;
1255 }
Alex Deucher39aee492013-04-10 13:41:25 -04001256 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001257 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1258 }
1259 } else if (num_rbs < 4) {
1260 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1261 switch (reg_offset) {
1262 case 0:
1263 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1265 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1267 break;
1268 case 1:
1269 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1271 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1272 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1273 break;
1274 case 2:
1275 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1277 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1279 break;
1280 case 3:
1281 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1283 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1285 break;
1286 case 4:
1287 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1289 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1290 TILE_SPLIT(split_equal_to_row_size));
1291 break;
1292 case 5:
1293 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1295 break;
1296 case 6:
1297 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1299 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1301 break;
1302 case 7:
1303 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1304 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1305 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1306 TILE_SPLIT(split_equal_to_row_size));
1307 break;
1308 case 8:
1309 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1310 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1311 break;
1312 case 9:
1313 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1315 break;
1316 case 10:
1317 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1321 break;
1322 case 11:
1323 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1324 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1325 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1327 break;
1328 case 12:
1329 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1331 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1333 break;
1334 case 13:
1335 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1337 break;
1338 case 14:
1339 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1340 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1341 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1343 break;
1344 case 16:
1345 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1346 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1347 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1349 break;
1350 case 17:
1351 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1352 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1353 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1355 break;
1356 case 27:
1357 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1358 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1359 break;
1360 case 28:
1361 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1362 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1363 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1365 break;
1366 case 29:
1367 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1368 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1369 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1371 break;
1372 case 30:
1373 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1375 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1377 break;
1378 default:
1379 gb_tile_moden = 0;
1380 break;
1381 }
Alex Deucher39aee492013-04-10 13:41:25 -04001382 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001383 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1384 }
1385 }
1386 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1387 switch (reg_offset) {
1388 case 0:
1389 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1392 NUM_BANKS(ADDR_SURF_16_BANK));
1393 break;
1394 case 1:
1395 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1398 NUM_BANKS(ADDR_SURF_16_BANK));
1399 break;
1400 case 2:
1401 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1404 NUM_BANKS(ADDR_SURF_16_BANK));
1405 break;
1406 case 3:
1407 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1410 NUM_BANKS(ADDR_SURF_16_BANK));
1411 break;
1412 case 4:
1413 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1416 NUM_BANKS(ADDR_SURF_16_BANK));
1417 break;
1418 case 5:
1419 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1422 NUM_BANKS(ADDR_SURF_8_BANK));
1423 break;
1424 case 6:
1425 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1428 NUM_BANKS(ADDR_SURF_4_BANK));
1429 break;
1430 case 8:
1431 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1434 NUM_BANKS(ADDR_SURF_16_BANK));
1435 break;
1436 case 9:
1437 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1438 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1439 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1440 NUM_BANKS(ADDR_SURF_16_BANK));
1441 break;
1442 case 10:
1443 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1444 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1445 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1446 NUM_BANKS(ADDR_SURF_16_BANK));
1447 break;
1448 case 11:
1449 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1452 NUM_BANKS(ADDR_SURF_16_BANK));
1453 break;
1454 case 12:
1455 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1458 NUM_BANKS(ADDR_SURF_16_BANK));
1459 break;
1460 case 13:
1461 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1464 NUM_BANKS(ADDR_SURF_8_BANK));
1465 break;
1466 case 14:
1467 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1470 NUM_BANKS(ADDR_SURF_4_BANK));
1471 break;
1472 default:
1473 gb_tile_moden = 0;
1474 break;
1475 }
1476 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1477 }
1478 } else if (num_pipe_configs == 2) {
1479 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1480 switch (reg_offset) {
1481 case 0:
1482 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1483 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1484 PIPE_CONFIG(ADDR_SURF_P2) |
1485 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1486 break;
1487 case 1:
1488 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1489 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1490 PIPE_CONFIG(ADDR_SURF_P2) |
1491 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1492 break;
1493 case 2:
1494 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1495 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1496 PIPE_CONFIG(ADDR_SURF_P2) |
1497 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1498 break;
1499 case 3:
1500 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1502 PIPE_CONFIG(ADDR_SURF_P2) |
1503 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1504 break;
1505 case 4:
1506 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1507 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1508 PIPE_CONFIG(ADDR_SURF_P2) |
1509 TILE_SPLIT(split_equal_to_row_size));
1510 break;
1511 case 5:
1512 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1514 break;
1515 case 6:
1516 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1518 PIPE_CONFIG(ADDR_SURF_P2) |
1519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1520 break;
1521 case 7:
1522 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1524 PIPE_CONFIG(ADDR_SURF_P2) |
1525 TILE_SPLIT(split_equal_to_row_size));
1526 break;
1527 case 8:
1528 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1529 break;
1530 case 9:
1531 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1532 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1533 break;
1534 case 10:
1535 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1536 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1537 PIPE_CONFIG(ADDR_SURF_P2) |
1538 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1539 break;
1540 case 11:
1541 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1542 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1543 PIPE_CONFIG(ADDR_SURF_P2) |
1544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1545 break;
1546 case 12:
1547 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1549 PIPE_CONFIG(ADDR_SURF_P2) |
1550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1551 break;
1552 case 13:
1553 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1554 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1555 break;
1556 case 14:
1557 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1558 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1559 PIPE_CONFIG(ADDR_SURF_P2) |
1560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1561 break;
1562 case 16:
1563 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1565 PIPE_CONFIG(ADDR_SURF_P2) |
1566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1567 break;
1568 case 17:
1569 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1570 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1571 PIPE_CONFIG(ADDR_SURF_P2) |
1572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1573 break;
1574 case 27:
1575 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1576 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1577 break;
1578 case 28:
1579 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1580 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1581 PIPE_CONFIG(ADDR_SURF_P2) |
1582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1583 break;
1584 case 29:
1585 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1586 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1587 PIPE_CONFIG(ADDR_SURF_P2) |
1588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1589 break;
1590 case 30:
1591 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1592 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1593 PIPE_CONFIG(ADDR_SURF_P2) |
1594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1595 break;
1596 default:
1597 gb_tile_moden = 0;
1598 break;
1599 }
Alex Deucher39aee492013-04-10 13:41:25 -04001600 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001601 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1602 }
1603 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1604 switch (reg_offset) {
1605 case 0:
1606 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1609 NUM_BANKS(ADDR_SURF_16_BANK));
1610 break;
1611 case 1:
1612 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1613 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1614 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1615 NUM_BANKS(ADDR_SURF_16_BANK));
1616 break;
1617 case 2:
1618 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1619 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1620 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1621 NUM_BANKS(ADDR_SURF_16_BANK));
1622 break;
1623 case 3:
1624 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1625 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1626 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1627 NUM_BANKS(ADDR_SURF_16_BANK));
1628 break;
1629 case 4:
1630 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1633 NUM_BANKS(ADDR_SURF_16_BANK));
1634 break;
1635 case 5:
1636 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1639 NUM_BANKS(ADDR_SURF_16_BANK));
1640 break;
1641 case 6:
1642 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1645 NUM_BANKS(ADDR_SURF_8_BANK));
1646 break;
1647 case 8:
1648 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1651 NUM_BANKS(ADDR_SURF_16_BANK));
1652 break;
1653 case 9:
1654 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1657 NUM_BANKS(ADDR_SURF_16_BANK));
1658 break;
1659 case 10:
1660 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1663 NUM_BANKS(ADDR_SURF_16_BANK));
1664 break;
1665 case 11:
1666 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1669 NUM_BANKS(ADDR_SURF_16_BANK));
1670 break;
1671 case 12:
1672 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1675 NUM_BANKS(ADDR_SURF_16_BANK));
1676 break;
1677 case 13:
1678 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1681 NUM_BANKS(ADDR_SURF_16_BANK));
1682 break;
1683 case 14:
1684 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1687 NUM_BANKS(ADDR_SURF_8_BANK));
1688 break;
1689 default:
1690 gb_tile_moden = 0;
1691 break;
1692 }
1693 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1694 }
1695 } else
1696 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1697}
1698
1699/**
1700 * cik_select_se_sh - select which SE, SH to address
1701 *
1702 * @rdev: radeon_device pointer
1703 * @se_num: shader engine to address
1704 * @sh_num: sh block to address
1705 *
1706 * Select which SE, SH combinations to address. Certain
1707 * registers are instanced per SE or SH. 0xffffffff means
1708 * broadcast to all SEs or SHs (CIK).
1709 */
1710static void cik_select_se_sh(struct radeon_device *rdev,
1711 u32 se_num, u32 sh_num)
1712{
1713 u32 data = INSTANCE_BROADCAST_WRITES;
1714
1715 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04001716 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001717 else if (se_num == 0xffffffff)
1718 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1719 else if (sh_num == 0xffffffff)
1720 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1721 else
1722 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1723 WREG32(GRBM_GFX_INDEX, data);
1724}
1725
1726/**
1727 * cik_create_bitmask - create a bitmask
1728 *
1729 * @bit_width: length of the mask
1730 *
1731 * create a variable length bit mask (CIK).
1732 * Returns the bitmask.
1733 */
1734static u32 cik_create_bitmask(u32 bit_width)
1735{
1736 u32 i, mask = 0;
1737
1738 for (i = 0; i < bit_width; i++) {
1739 mask <<= 1;
1740 mask |= 1;
1741 }
1742 return mask;
1743}
1744
1745/**
1746 * cik_select_se_sh - select which SE, SH to address
1747 *
1748 * @rdev: radeon_device pointer
1749 * @max_rb_num: max RBs (render backends) for the asic
1750 * @se_num: number of SEs (shader engines) for the asic
1751 * @sh_per_se: number of SH blocks per SE for the asic
1752 *
1753 * Calculates the bitmask of disabled RBs (CIK).
1754 * Returns the disabled RB bitmask.
1755 */
1756static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1757 u32 max_rb_num, u32 se_num,
1758 u32 sh_per_se)
1759{
1760 u32 data, mask;
1761
1762 data = RREG32(CC_RB_BACKEND_DISABLE);
1763 if (data & 1)
1764 data &= BACKEND_DISABLE_MASK;
1765 else
1766 data = 0;
1767 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1768
1769 data >>= BACKEND_DISABLE_SHIFT;
1770
1771 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1772
1773 return data & mask;
1774}
1775
1776/**
1777 * cik_setup_rb - setup the RBs on the asic
1778 *
1779 * @rdev: radeon_device pointer
1780 * @se_num: number of SEs (shader engines) for the asic
1781 * @sh_per_se: number of SH blocks per SE for the asic
1782 * @max_rb_num: max RBs (render backends) for the asic
1783 *
1784 * Configures per-SE/SH RB registers (CIK).
1785 */
1786static void cik_setup_rb(struct radeon_device *rdev,
1787 u32 se_num, u32 sh_per_se,
1788 u32 max_rb_num)
1789{
1790 int i, j;
1791 u32 data, mask;
1792 u32 disabled_rbs = 0;
1793 u32 enabled_rbs = 0;
1794
1795 for (i = 0; i < se_num; i++) {
1796 for (j = 0; j < sh_per_se; j++) {
1797 cik_select_se_sh(rdev, i, j);
1798 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1799 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1800 }
1801 }
1802 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1803
1804 mask = 1;
1805 for (i = 0; i < max_rb_num; i++) {
1806 if (!(disabled_rbs & mask))
1807 enabled_rbs |= mask;
1808 mask <<= 1;
1809 }
1810
1811 for (i = 0; i < se_num; i++) {
1812 cik_select_se_sh(rdev, i, 0xffffffff);
1813 data = 0;
1814 for (j = 0; j < sh_per_se; j++) {
1815 switch (enabled_rbs & 3) {
1816 case 1:
1817 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1818 break;
1819 case 2:
1820 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1821 break;
1822 case 3:
1823 default:
1824 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1825 break;
1826 }
1827 enabled_rbs >>= 2;
1828 }
1829 WREG32(PA_SC_RASTER_CONFIG, data);
1830 }
1831 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1832}
1833
1834/**
1835 * cik_gpu_init - setup the 3D engine
1836 *
1837 * @rdev: radeon_device pointer
1838 *
1839 * Configures the 3D engine and tiling configuration
1840 * registers so that the 3D engine is usable.
1841 */
1842static void cik_gpu_init(struct radeon_device *rdev)
1843{
1844 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1845 u32 mc_shared_chmap, mc_arb_ramcfg;
1846 u32 hdp_host_path_cntl;
1847 u32 tmp;
1848 int i, j;
1849
1850 switch (rdev->family) {
1851 case CHIP_BONAIRE:
1852 rdev->config.cik.max_shader_engines = 2;
1853 rdev->config.cik.max_tile_pipes = 4;
1854 rdev->config.cik.max_cu_per_sh = 7;
1855 rdev->config.cik.max_sh_per_se = 1;
1856 rdev->config.cik.max_backends_per_se = 2;
1857 rdev->config.cik.max_texture_channel_caches = 4;
1858 rdev->config.cik.max_gprs = 256;
1859 rdev->config.cik.max_gs_threads = 32;
1860 rdev->config.cik.max_hw_contexts = 8;
1861
1862 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1863 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1864 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1865 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1866 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1867 break;
1868 case CHIP_KAVERI:
1869 /* TODO */
1870 break;
1871 case CHIP_KABINI:
1872 default:
1873 rdev->config.cik.max_shader_engines = 1;
1874 rdev->config.cik.max_tile_pipes = 2;
1875 rdev->config.cik.max_cu_per_sh = 2;
1876 rdev->config.cik.max_sh_per_se = 1;
1877 rdev->config.cik.max_backends_per_se = 1;
1878 rdev->config.cik.max_texture_channel_caches = 2;
1879 rdev->config.cik.max_gprs = 256;
1880 rdev->config.cik.max_gs_threads = 16;
1881 rdev->config.cik.max_hw_contexts = 8;
1882
1883 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1884 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1885 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1886 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1887 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1888 break;
1889 }
1890
1891 /* Initialize HDP */
1892 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1893 WREG32((0x2c14 + j), 0x00000000);
1894 WREG32((0x2c18 + j), 0x00000000);
1895 WREG32((0x2c1c + j), 0x00000000);
1896 WREG32((0x2c20 + j), 0x00000000);
1897 WREG32((0x2c24 + j), 0x00000000);
1898 }
1899
1900 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1901
1902 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1903
1904 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1905 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1906
1907 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1908 rdev->config.cik.mem_max_burst_length_bytes = 256;
1909 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1910 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1911 if (rdev->config.cik.mem_row_size_in_kb > 4)
1912 rdev->config.cik.mem_row_size_in_kb = 4;
1913 /* XXX use MC settings? */
1914 rdev->config.cik.shader_engine_tile_size = 32;
1915 rdev->config.cik.num_gpus = 1;
1916 rdev->config.cik.multi_gpu_tile_size = 64;
1917
1918 /* fix up row size */
1919 gb_addr_config &= ~ROW_SIZE_MASK;
1920 switch (rdev->config.cik.mem_row_size_in_kb) {
1921 case 1:
1922 default:
1923 gb_addr_config |= ROW_SIZE(0);
1924 break;
1925 case 2:
1926 gb_addr_config |= ROW_SIZE(1);
1927 break;
1928 case 4:
1929 gb_addr_config |= ROW_SIZE(2);
1930 break;
1931 }
1932
1933 /* setup tiling info dword. gb_addr_config is not adequate since it does
1934 * not have bank info, so create a custom tiling dword.
1935 * bits 3:0 num_pipes
1936 * bits 7:4 num_banks
1937 * bits 11:8 group_size
1938 * bits 15:12 row_size
1939 */
1940 rdev->config.cik.tile_config = 0;
1941 switch (rdev->config.cik.num_tile_pipes) {
1942 case 1:
1943 rdev->config.cik.tile_config |= (0 << 0);
1944 break;
1945 case 2:
1946 rdev->config.cik.tile_config |= (1 << 0);
1947 break;
1948 case 4:
1949 rdev->config.cik.tile_config |= (2 << 0);
1950 break;
1951 case 8:
1952 default:
1953 /* XXX what about 12? */
1954 rdev->config.cik.tile_config |= (3 << 0);
1955 break;
1956 }
1957 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1958 rdev->config.cik.tile_config |= 1 << 4;
1959 else
1960 rdev->config.cik.tile_config |= 0 << 4;
1961 rdev->config.cik.tile_config |=
1962 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1963 rdev->config.cik.tile_config |=
1964 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1965
1966 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1967 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1968 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001969 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1970 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001971 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1972 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1973 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001974
1975 cik_tiling_mode_table_init(rdev);
1976
1977 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1978 rdev->config.cik.max_sh_per_se,
1979 rdev->config.cik.max_backends_per_se);
1980
1981 /* set HW defaults for 3D engine */
1982 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1983
1984 WREG32(SX_DEBUG_1, 0x20);
1985
1986 WREG32(TA_CNTL_AUX, 0x00010000);
1987
1988 tmp = RREG32(SPI_CONFIG_CNTL);
1989 tmp |= 0x03000000;
1990 WREG32(SPI_CONFIG_CNTL, tmp);
1991
1992 WREG32(SQ_CONFIG, 1);
1993
1994 WREG32(DB_DEBUG, 0);
1995
1996 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1997 tmp |= 0x00000400;
1998 WREG32(DB_DEBUG2, tmp);
1999
2000 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2001 tmp |= 0x00020200;
2002 WREG32(DB_DEBUG3, tmp);
2003
2004 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2005 tmp |= 0x00018208;
2006 WREG32(CB_HW_CONTROL, tmp);
2007
2008 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2009
2010 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2011 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2012 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2013 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2014
2015 WREG32(VGT_NUM_INSTANCES, 1);
2016
2017 WREG32(CP_PERFMON_CNTL, 0);
2018
2019 WREG32(SQ_CONFIG, 0);
2020
2021 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2022 FORCE_EOV_MAX_REZ_CNT(255)));
2023
2024 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2025 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2026
2027 WREG32(VGT_GS_VERTEX_REUSE, 16);
2028 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2029
2030 tmp = RREG32(HDP_MISC_CNTL);
2031 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2032 WREG32(HDP_MISC_CNTL, tmp);
2033
2034 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2035 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2036
2037 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2038 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2039
2040 udelay(50);
2041}
2042
Alex Deucher841cf442012-12-18 21:47:44 -05002043/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002044 * GPU scratch registers helpers function.
2045 */
2046/**
2047 * cik_scratch_init - setup driver info for CP scratch regs
2048 *
2049 * @rdev: radeon_device pointer
2050 *
2051 * Set up the number and offset of the CP scratch registers.
2052 * NOTE: use of CP scratch registers is a legacy inferface and
2053 * is not used by default on newer asics (r6xx+). On newer asics,
2054 * memory buffers are used for fences rather than scratch regs.
2055 */
2056static void cik_scratch_init(struct radeon_device *rdev)
2057{
2058 int i;
2059
2060 rdev->scratch.num_reg = 7;
2061 rdev->scratch.reg_base = SCRATCH_REG0;
2062 for (i = 0; i < rdev->scratch.num_reg; i++) {
2063 rdev->scratch.free[i] = true;
2064 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2065 }
2066}
2067
2068/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002069 * cik_ring_test - basic gfx ring test
2070 *
2071 * @rdev: radeon_device pointer
2072 * @ring: radeon_ring structure holding ring information
2073 *
2074 * Allocate a scratch register and write to it using the gfx ring (CIK).
2075 * Provides a basic gfx ring test to verify that the ring is working.
2076 * Used by cik_cp_gfx_resume();
2077 * Returns 0 on success, error on failure.
2078 */
2079int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2080{
2081 uint32_t scratch;
2082 uint32_t tmp = 0;
2083 unsigned i;
2084 int r;
2085
2086 r = radeon_scratch_get(rdev, &scratch);
2087 if (r) {
2088 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2089 return r;
2090 }
2091 WREG32(scratch, 0xCAFEDEAD);
2092 r = radeon_ring_lock(rdev, ring, 3);
2093 if (r) {
2094 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2095 radeon_scratch_free(rdev, scratch);
2096 return r;
2097 }
2098 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2099 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2100 radeon_ring_write(ring, 0xDEADBEEF);
2101 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002102
Alex Deucherfbc832c2012-07-20 14:41:35 -04002103 for (i = 0; i < rdev->usec_timeout; i++) {
2104 tmp = RREG32(scratch);
2105 if (tmp == 0xDEADBEEF)
2106 break;
2107 DRM_UDELAY(1);
2108 }
2109 if (i < rdev->usec_timeout) {
2110 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2111 } else {
2112 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2113 ring->idx, scratch, tmp);
2114 r = -EINVAL;
2115 }
2116 radeon_scratch_free(rdev, scratch);
2117 return r;
2118}
2119
2120/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002121 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002122 *
2123 * @rdev: radeon_device pointer
2124 * @fence: radeon fence object
2125 *
2126 * Emits a fence sequnce number on the gfx ring and flushes
2127 * GPU caches.
2128 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002129void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2130 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002131{
2132 struct radeon_ring *ring = &rdev->ring[fence->ring];
2133 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2134
2135 /* EVENT_WRITE_EOP - flush caches, send int */
2136 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2137 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2138 EOP_TC_ACTION_EN |
2139 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2140 EVENT_INDEX(5)));
2141 radeon_ring_write(ring, addr & 0xfffffffc);
2142 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2143 radeon_ring_write(ring, fence->seq);
2144 radeon_ring_write(ring, 0);
2145 /* HDP flush */
2146 /* We should be using the new WAIT_REG_MEM special op packet here
2147 * but it causes the CP to hang
2148 */
2149 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2150 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2151 WRITE_DATA_DST_SEL(0)));
2152 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2153 radeon_ring_write(ring, 0);
2154 radeon_ring_write(ring, 0);
2155}
2156
Alex Deucherb07fdd32013-04-11 09:36:17 -04002157/**
2158 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2159 *
2160 * @rdev: radeon_device pointer
2161 * @fence: radeon fence object
2162 *
2163 * Emits a fence sequnce number on the compute ring and flushes
2164 * GPU caches.
2165 */
2166void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2167 struct radeon_fence *fence)
2168{
2169 struct radeon_ring *ring = &rdev->ring[fence->ring];
2170 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2171
2172 /* RELEASE_MEM - flush caches, send int */
2173 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2174 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2175 EOP_TC_ACTION_EN |
2176 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2177 EVENT_INDEX(5)));
2178 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2179 radeon_ring_write(ring, addr & 0xfffffffc);
2180 radeon_ring_write(ring, upper_32_bits(addr));
2181 radeon_ring_write(ring, fence->seq);
2182 radeon_ring_write(ring, 0);
2183 /* HDP flush */
2184 /* We should be using the new WAIT_REG_MEM special op packet here
2185 * but it causes the CP to hang
2186 */
2187 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2188 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2189 WRITE_DATA_DST_SEL(0)));
2190 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2191 radeon_ring_write(ring, 0);
2192 radeon_ring_write(ring, 0);
2193}
2194
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002195void cik_semaphore_ring_emit(struct radeon_device *rdev,
2196 struct radeon_ring *ring,
2197 struct radeon_semaphore *semaphore,
2198 bool emit_wait)
2199{
2200 uint64_t addr = semaphore->gpu_addr;
2201 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2202
2203 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2204 radeon_ring_write(ring, addr & 0xffffffff);
2205 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2206}
2207
2208/*
2209 * IB stuff
2210 */
2211/**
2212 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2213 *
2214 * @rdev: radeon_device pointer
2215 * @ib: radeon indirect buffer object
2216 *
2217 * Emits an DE (drawing engine) or CE (constant engine) IB
2218 * on the gfx ring. IBs are usually generated by userspace
2219 * acceleration drivers and submitted to the kernel for
2220 * sheduling on the ring. This function schedules the IB
2221 * on the gfx ring for execution by the GPU.
2222 */
2223void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2224{
2225 struct radeon_ring *ring = &rdev->ring[ib->ring];
2226 u32 header, control = INDIRECT_BUFFER_VALID;
2227
2228 if (ib->is_const_ib) {
2229 /* set switch buffer packet before const IB */
2230 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2231 radeon_ring_write(ring, 0);
2232
2233 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2234 } else {
2235 u32 next_rptr;
2236 if (ring->rptr_save_reg) {
2237 next_rptr = ring->wptr + 3 + 4;
2238 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2239 radeon_ring_write(ring, ((ring->rptr_save_reg -
2240 PACKET3_SET_UCONFIG_REG_START) >> 2));
2241 radeon_ring_write(ring, next_rptr);
2242 } else if (rdev->wb.enabled) {
2243 next_rptr = ring->wptr + 5 + 4;
2244 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2245 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2246 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2247 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2248 radeon_ring_write(ring, next_rptr);
2249 }
2250
2251 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2252 }
2253
2254 control |= ib->length_dw |
2255 (ib->vm ? (ib->vm->id << 24) : 0);
2256
2257 radeon_ring_write(ring, header);
2258 radeon_ring_write(ring,
2259#ifdef __BIG_ENDIAN
2260 (2 << 0) |
2261#endif
2262 (ib->gpu_addr & 0xFFFFFFFC));
2263 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2264 radeon_ring_write(ring, control);
2265}
2266
Alex Deucherfbc832c2012-07-20 14:41:35 -04002267/**
2268 * cik_ib_test - basic gfx ring IB test
2269 *
2270 * @rdev: radeon_device pointer
2271 * @ring: radeon_ring structure holding ring information
2272 *
2273 * Allocate an IB and execute it on the gfx ring (CIK).
2274 * Provides a basic gfx ring test to verify that IBs are working.
2275 * Returns 0 on success, error on failure.
2276 */
2277int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2278{
2279 struct radeon_ib ib;
2280 uint32_t scratch;
2281 uint32_t tmp = 0;
2282 unsigned i;
2283 int r;
2284
2285 r = radeon_scratch_get(rdev, &scratch);
2286 if (r) {
2287 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2288 return r;
2289 }
2290 WREG32(scratch, 0xCAFEDEAD);
2291 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2292 if (r) {
2293 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2294 return r;
2295 }
2296 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2297 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2298 ib.ptr[2] = 0xDEADBEEF;
2299 ib.length_dw = 3;
2300 r = radeon_ib_schedule(rdev, &ib, NULL);
2301 if (r) {
2302 radeon_scratch_free(rdev, scratch);
2303 radeon_ib_free(rdev, &ib);
2304 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2305 return r;
2306 }
2307 r = radeon_fence_wait(ib.fence, false);
2308 if (r) {
2309 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2310 return r;
2311 }
2312 for (i = 0; i < rdev->usec_timeout; i++) {
2313 tmp = RREG32(scratch);
2314 if (tmp == 0xDEADBEEF)
2315 break;
2316 DRM_UDELAY(1);
2317 }
2318 if (i < rdev->usec_timeout) {
2319 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2320 } else {
2321 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2322 scratch, tmp);
2323 r = -EINVAL;
2324 }
2325 radeon_scratch_free(rdev, scratch);
2326 radeon_ib_free(rdev, &ib);
2327 return r;
2328}
2329
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002330/*
Alex Deucher841cf442012-12-18 21:47:44 -05002331 * CP.
2332 * On CIK, gfx and compute now have independant command processors.
2333 *
2334 * GFX
2335 * Gfx consists of a single ring and can process both gfx jobs and
2336 * compute jobs. The gfx CP consists of three microengines (ME):
2337 * PFP - Pre-Fetch Parser
2338 * ME - Micro Engine
2339 * CE - Constant Engine
2340 * The PFP and ME make up what is considered the Drawing Engine (DE).
2341 * The CE is an asynchronous engine used for updating buffer desciptors
2342 * used by the DE so that they can be loaded into cache in parallel
2343 * while the DE is processing state update packets.
2344 *
2345 * Compute
2346 * The compute CP consists of two microengines (ME):
2347 * MEC1 - Compute MicroEngine 1
2348 * MEC2 - Compute MicroEngine 2
2349 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2350 * The queues are exposed to userspace and are programmed directly
2351 * by the compute runtime.
2352 */
2353/**
2354 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2355 *
2356 * @rdev: radeon_device pointer
2357 * @enable: enable or disable the MEs
2358 *
2359 * Halts or unhalts the gfx MEs.
2360 */
2361static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2362{
2363 if (enable)
2364 WREG32(CP_ME_CNTL, 0);
2365 else {
2366 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2367 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2368 }
2369 udelay(50);
2370}
2371
2372/**
2373 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2374 *
2375 * @rdev: radeon_device pointer
2376 *
2377 * Loads the gfx PFP, ME, and CE ucode.
2378 * Returns 0 for success, -EINVAL if the ucode is not available.
2379 */
2380static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2381{
2382 const __be32 *fw_data;
2383 int i;
2384
2385 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2386 return -EINVAL;
2387
2388 cik_cp_gfx_enable(rdev, false);
2389
2390 /* PFP */
2391 fw_data = (const __be32 *)rdev->pfp_fw->data;
2392 WREG32(CP_PFP_UCODE_ADDR, 0);
2393 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2394 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2395 WREG32(CP_PFP_UCODE_ADDR, 0);
2396
2397 /* CE */
2398 fw_data = (const __be32 *)rdev->ce_fw->data;
2399 WREG32(CP_CE_UCODE_ADDR, 0);
2400 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2401 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2402 WREG32(CP_CE_UCODE_ADDR, 0);
2403
2404 /* ME */
2405 fw_data = (const __be32 *)rdev->me_fw->data;
2406 WREG32(CP_ME_RAM_WADDR, 0);
2407 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2408 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2409 WREG32(CP_ME_RAM_WADDR, 0);
2410
2411 WREG32(CP_PFP_UCODE_ADDR, 0);
2412 WREG32(CP_CE_UCODE_ADDR, 0);
2413 WREG32(CP_ME_RAM_WADDR, 0);
2414 WREG32(CP_ME_RAM_RADDR, 0);
2415 return 0;
2416}
2417
2418/**
2419 * cik_cp_gfx_start - start the gfx ring
2420 *
2421 * @rdev: radeon_device pointer
2422 *
2423 * Enables the ring and loads the clear state context and other
2424 * packets required to init the ring.
2425 * Returns 0 for success, error for failure.
2426 */
2427static int cik_cp_gfx_start(struct radeon_device *rdev)
2428{
2429 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2430 int r, i;
2431
2432 /* init the CP */
2433 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2434 WREG32(CP_ENDIAN_SWAP, 0);
2435 WREG32(CP_DEVICE_ID, 1);
2436
2437 cik_cp_gfx_enable(rdev, true);
2438
2439 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2440 if (r) {
2441 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2442 return r;
2443 }
2444
2445 /* init the CE partitions. CE only used for gfx on CIK */
2446 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2447 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2448 radeon_ring_write(ring, 0xc000);
2449 radeon_ring_write(ring, 0xc000);
2450
2451 /* setup clear context state */
2452 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2453 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2454
2455 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2456 radeon_ring_write(ring, 0x80000000);
2457 radeon_ring_write(ring, 0x80000000);
2458
2459 for (i = 0; i < cik_default_size; i++)
2460 radeon_ring_write(ring, cik_default_state[i]);
2461
2462 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2463 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2464
2465 /* set clear context state */
2466 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2467 radeon_ring_write(ring, 0);
2468
2469 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2470 radeon_ring_write(ring, 0x00000316);
2471 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2472 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2473
2474 radeon_ring_unlock_commit(rdev, ring);
2475
2476 return 0;
2477}
2478
2479/**
2480 * cik_cp_gfx_fini - stop the gfx ring
2481 *
2482 * @rdev: radeon_device pointer
2483 *
2484 * Stop the gfx ring and tear down the driver ring
2485 * info.
2486 */
2487static void cik_cp_gfx_fini(struct radeon_device *rdev)
2488{
2489 cik_cp_gfx_enable(rdev, false);
2490 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2491}
2492
2493/**
2494 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2495 *
2496 * @rdev: radeon_device pointer
2497 *
2498 * Program the location and size of the gfx ring buffer
2499 * and test it to make sure it's working.
2500 * Returns 0 for success, error for failure.
2501 */
2502static int cik_cp_gfx_resume(struct radeon_device *rdev)
2503{
2504 struct radeon_ring *ring;
2505 u32 tmp;
2506 u32 rb_bufsz;
2507 u64 rb_addr;
2508 int r;
2509
2510 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2511 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2512
2513 /* Set the write pointer delay */
2514 WREG32(CP_RB_WPTR_DELAY, 0);
2515
2516 /* set the RB to use vmid 0 */
2517 WREG32(CP_RB_VMID, 0);
2518
2519 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2520
2521 /* ring 0 - compute and gfx */
2522 /* Set ring buffer size */
2523 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2524 rb_bufsz = drm_order(ring->ring_size / 8);
2525 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2526#ifdef __BIG_ENDIAN
2527 tmp |= BUF_SWAP_32BIT;
2528#endif
2529 WREG32(CP_RB0_CNTL, tmp);
2530
2531 /* Initialize the ring buffer's read and write pointers */
2532 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2533 ring->wptr = 0;
2534 WREG32(CP_RB0_WPTR, ring->wptr);
2535
2536 /* set the wb address wether it's enabled or not */
2537 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2538 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2539
2540 /* scratch register shadowing is no longer supported */
2541 WREG32(SCRATCH_UMSK, 0);
2542
2543 if (!rdev->wb.enabled)
2544 tmp |= RB_NO_UPDATE;
2545
2546 mdelay(1);
2547 WREG32(CP_RB0_CNTL, tmp);
2548
2549 rb_addr = ring->gpu_addr >> 8;
2550 WREG32(CP_RB0_BASE, rb_addr);
2551 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2552
2553 ring->rptr = RREG32(CP_RB0_RPTR);
2554
2555 /* start the ring */
2556 cik_cp_gfx_start(rdev);
2557 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2558 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2559 if (r) {
2560 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2561 return r;
2562 }
2563 return 0;
2564}
2565
Alex Deucher963e81f2013-06-26 17:37:11 -04002566u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2567 struct radeon_ring *ring)
2568{
2569 u32 rptr;
2570
2571
2572
2573 if (rdev->wb.enabled) {
2574 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2575 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002576 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002577 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2578 rptr = RREG32(CP_HQD_PQ_RPTR);
2579 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002580 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002581 }
2582 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2583
2584 return rptr;
2585}
2586
2587u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2588 struct radeon_ring *ring)
2589{
2590 u32 wptr;
2591
2592 if (rdev->wb.enabled) {
2593 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2594 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002595 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002596 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2597 wptr = RREG32(CP_HQD_PQ_WPTR);
2598 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002599 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002600 }
2601 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2602
2603 return wptr;
2604}
2605
2606void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2607 struct radeon_ring *ring)
2608{
2609 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2610
2611 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2612 WDOORBELL32(ring->doorbell_offset, wptr);
2613}
2614
Alex Deucher841cf442012-12-18 21:47:44 -05002615/**
2616 * cik_cp_compute_enable - enable/disable the compute CP MEs
2617 *
2618 * @rdev: radeon_device pointer
2619 * @enable: enable or disable the MEs
2620 *
2621 * Halts or unhalts the compute MEs.
2622 */
2623static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2624{
2625 if (enable)
2626 WREG32(CP_MEC_CNTL, 0);
2627 else
2628 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2629 udelay(50);
2630}
2631
2632/**
2633 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2634 *
2635 * @rdev: radeon_device pointer
2636 *
2637 * Loads the compute MEC1&2 ucode.
2638 * Returns 0 for success, -EINVAL if the ucode is not available.
2639 */
2640static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2641{
2642 const __be32 *fw_data;
2643 int i;
2644
2645 if (!rdev->mec_fw)
2646 return -EINVAL;
2647
2648 cik_cp_compute_enable(rdev, false);
2649
2650 /* MEC1 */
2651 fw_data = (const __be32 *)rdev->mec_fw->data;
2652 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2653 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2654 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2655 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2656
2657 if (rdev->family == CHIP_KAVERI) {
2658 /* MEC2 */
2659 fw_data = (const __be32 *)rdev->mec_fw->data;
2660 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2661 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2662 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2663 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2664 }
2665
2666 return 0;
2667}
2668
2669/**
2670 * cik_cp_compute_start - start the compute queues
2671 *
2672 * @rdev: radeon_device pointer
2673 *
2674 * Enable the compute queues.
2675 * Returns 0 for success, error for failure.
2676 */
2677static int cik_cp_compute_start(struct radeon_device *rdev)
2678{
Alex Deucher963e81f2013-06-26 17:37:11 -04002679 cik_cp_compute_enable(rdev, true);
2680
Alex Deucher841cf442012-12-18 21:47:44 -05002681 return 0;
2682}
2683
2684/**
2685 * cik_cp_compute_fini - stop the compute queues
2686 *
2687 * @rdev: radeon_device pointer
2688 *
2689 * Stop the compute queues and tear down the driver queue
2690 * info.
2691 */
2692static void cik_cp_compute_fini(struct radeon_device *rdev)
2693{
Alex Deucher963e81f2013-06-26 17:37:11 -04002694 int i, idx, r;
2695
Alex Deucher841cf442012-12-18 21:47:44 -05002696 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002697
2698 for (i = 0; i < 2; i++) {
2699 if (i == 0)
2700 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2701 else
2702 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2703
2704 if (rdev->ring[idx].mqd_obj) {
2705 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2706 if (unlikely(r != 0))
2707 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2708
2709 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2710 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2711
2712 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2713 rdev->ring[idx].mqd_obj = NULL;
2714 }
2715 }
Alex Deucher841cf442012-12-18 21:47:44 -05002716}
2717
Alex Deucher963e81f2013-06-26 17:37:11 -04002718static void cik_mec_fini(struct radeon_device *rdev)
2719{
2720 int r;
2721
2722 if (rdev->mec.hpd_eop_obj) {
2723 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2724 if (unlikely(r != 0))
2725 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2726 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2727 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2728
2729 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2730 rdev->mec.hpd_eop_obj = NULL;
2731 }
2732}
2733
2734#define MEC_HPD_SIZE 2048
2735
2736static int cik_mec_init(struct radeon_device *rdev)
2737{
2738 int r;
2739 u32 *hpd;
2740
2741 /*
2742 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2743 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2744 */
2745 if (rdev->family == CHIP_KAVERI)
2746 rdev->mec.num_mec = 2;
2747 else
2748 rdev->mec.num_mec = 1;
2749 rdev->mec.num_pipe = 4;
2750 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2751
2752 if (rdev->mec.hpd_eop_obj == NULL) {
2753 r = radeon_bo_create(rdev,
2754 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2755 PAGE_SIZE, true,
2756 RADEON_GEM_DOMAIN_GTT, NULL,
2757 &rdev->mec.hpd_eop_obj);
2758 if (r) {
2759 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2760 return r;
2761 }
2762 }
2763
2764 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2765 if (unlikely(r != 0)) {
2766 cik_mec_fini(rdev);
2767 return r;
2768 }
2769 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2770 &rdev->mec.hpd_eop_gpu_addr);
2771 if (r) {
2772 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2773 cik_mec_fini(rdev);
2774 return r;
2775 }
2776 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2777 if (r) {
2778 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2779 cik_mec_fini(rdev);
2780 return r;
2781 }
2782
2783 /* clear memory. Not sure if this is required or not */
2784 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2785
2786 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2787 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2788
2789 return 0;
2790}
2791
2792struct hqd_registers
2793{
2794 u32 cp_mqd_base_addr;
2795 u32 cp_mqd_base_addr_hi;
2796 u32 cp_hqd_active;
2797 u32 cp_hqd_vmid;
2798 u32 cp_hqd_persistent_state;
2799 u32 cp_hqd_pipe_priority;
2800 u32 cp_hqd_queue_priority;
2801 u32 cp_hqd_quantum;
2802 u32 cp_hqd_pq_base;
2803 u32 cp_hqd_pq_base_hi;
2804 u32 cp_hqd_pq_rptr;
2805 u32 cp_hqd_pq_rptr_report_addr;
2806 u32 cp_hqd_pq_rptr_report_addr_hi;
2807 u32 cp_hqd_pq_wptr_poll_addr;
2808 u32 cp_hqd_pq_wptr_poll_addr_hi;
2809 u32 cp_hqd_pq_doorbell_control;
2810 u32 cp_hqd_pq_wptr;
2811 u32 cp_hqd_pq_control;
2812 u32 cp_hqd_ib_base_addr;
2813 u32 cp_hqd_ib_base_addr_hi;
2814 u32 cp_hqd_ib_rptr;
2815 u32 cp_hqd_ib_control;
2816 u32 cp_hqd_iq_timer;
2817 u32 cp_hqd_iq_rptr;
2818 u32 cp_hqd_dequeue_request;
2819 u32 cp_hqd_dma_offload;
2820 u32 cp_hqd_sema_cmd;
2821 u32 cp_hqd_msg_type;
2822 u32 cp_hqd_atomic0_preop_lo;
2823 u32 cp_hqd_atomic0_preop_hi;
2824 u32 cp_hqd_atomic1_preop_lo;
2825 u32 cp_hqd_atomic1_preop_hi;
2826 u32 cp_hqd_hq_scheduler0;
2827 u32 cp_hqd_hq_scheduler1;
2828 u32 cp_mqd_control;
2829};
2830
2831struct bonaire_mqd
2832{
2833 u32 header;
2834 u32 dispatch_initiator;
2835 u32 dimensions[3];
2836 u32 start_idx[3];
2837 u32 num_threads[3];
2838 u32 pipeline_stat_enable;
2839 u32 perf_counter_enable;
2840 u32 pgm[2];
2841 u32 tba[2];
2842 u32 tma[2];
2843 u32 pgm_rsrc[2];
2844 u32 vmid;
2845 u32 resource_limits;
2846 u32 static_thread_mgmt01[2];
2847 u32 tmp_ring_size;
2848 u32 static_thread_mgmt23[2];
2849 u32 restart[3];
2850 u32 thread_trace_enable;
2851 u32 reserved1;
2852 u32 user_data[16];
2853 u32 vgtcs_invoke_count[2];
2854 struct hqd_registers queue_state;
2855 u32 dequeue_cntr;
2856 u32 interrupt_queue[64];
2857};
2858
Alex Deucher841cf442012-12-18 21:47:44 -05002859/**
2860 * cik_cp_compute_resume - setup the compute queue registers
2861 *
2862 * @rdev: radeon_device pointer
2863 *
2864 * Program the compute queues and test them to make sure they
2865 * are working.
2866 * Returns 0 for success, error for failure.
2867 */
2868static int cik_cp_compute_resume(struct radeon_device *rdev)
2869{
Alex Deucher963e81f2013-06-26 17:37:11 -04002870 int r, i, idx;
2871 u32 tmp;
2872 bool use_doorbell = true;
2873 u64 hqd_gpu_addr;
2874 u64 mqd_gpu_addr;
2875 u64 eop_gpu_addr;
2876 u64 wb_gpu_addr;
2877 u32 *buf;
2878 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002879
Alex Deucher841cf442012-12-18 21:47:44 -05002880 r = cik_cp_compute_start(rdev);
2881 if (r)
2882 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002883
2884 /* fix up chicken bits */
2885 tmp = RREG32(CP_CPF_DEBUG);
2886 tmp |= (1 << 23);
2887 WREG32(CP_CPF_DEBUG, tmp);
2888
2889 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04002890 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002891 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2892 int me = (i < 4) ? 1 : 2;
2893 int pipe = (i < 4) ? i : (i - 4);
2894
2895 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2896
2897 cik_srbm_select(rdev, me, pipe, 0, 0);
2898
2899 /* write the EOP addr */
2900 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2901 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2902
2903 /* set the VMID assigned */
2904 WREG32(CP_HPD_EOP_VMID, 0);
2905
2906 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2907 tmp = RREG32(CP_HPD_EOP_CONTROL);
2908 tmp &= ~EOP_SIZE_MASK;
2909 tmp |= drm_order(MEC_HPD_SIZE / 8);
2910 WREG32(CP_HPD_EOP_CONTROL, tmp);
2911 }
2912 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002913 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002914
2915 /* init the queues. Just two for now. */
2916 for (i = 0; i < 2; i++) {
2917 if (i == 0)
2918 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2919 else
2920 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2921
2922 if (rdev->ring[idx].mqd_obj == NULL) {
2923 r = radeon_bo_create(rdev,
2924 sizeof(struct bonaire_mqd),
2925 PAGE_SIZE, true,
2926 RADEON_GEM_DOMAIN_GTT, NULL,
2927 &rdev->ring[idx].mqd_obj);
2928 if (r) {
2929 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2930 return r;
2931 }
2932 }
2933
2934 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2935 if (unlikely(r != 0)) {
2936 cik_cp_compute_fini(rdev);
2937 return r;
2938 }
2939 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2940 &mqd_gpu_addr);
2941 if (r) {
2942 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2943 cik_cp_compute_fini(rdev);
2944 return r;
2945 }
2946 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2947 if (r) {
2948 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2949 cik_cp_compute_fini(rdev);
2950 return r;
2951 }
2952
2953 /* doorbell offset */
2954 rdev->ring[idx].doorbell_offset =
2955 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2956
2957 /* init the mqd struct */
2958 memset(buf, 0, sizeof(struct bonaire_mqd));
2959
2960 mqd = (struct bonaire_mqd *)buf;
2961 mqd->header = 0xC0310800;
2962 mqd->static_thread_mgmt01[0] = 0xffffffff;
2963 mqd->static_thread_mgmt01[1] = 0xffffffff;
2964 mqd->static_thread_mgmt23[0] = 0xffffffff;
2965 mqd->static_thread_mgmt23[1] = 0xffffffff;
2966
Alex Deucherf61d5b462013-08-06 12:40:16 -04002967 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002968 cik_srbm_select(rdev, rdev->ring[idx].me,
2969 rdev->ring[idx].pipe,
2970 rdev->ring[idx].queue, 0);
2971
2972 /* disable wptr polling */
2973 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2974 tmp &= ~WPTR_POLL_EN;
2975 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2976
2977 /* enable doorbell? */
2978 mqd->queue_state.cp_hqd_pq_doorbell_control =
2979 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2980 if (use_doorbell)
2981 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2982 else
2983 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2984 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2985 mqd->queue_state.cp_hqd_pq_doorbell_control);
2986
2987 /* disable the queue if it's active */
2988 mqd->queue_state.cp_hqd_dequeue_request = 0;
2989 mqd->queue_state.cp_hqd_pq_rptr = 0;
2990 mqd->queue_state.cp_hqd_pq_wptr= 0;
2991 if (RREG32(CP_HQD_ACTIVE) & 1) {
2992 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2993 for (i = 0; i < rdev->usec_timeout; i++) {
2994 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2995 break;
2996 udelay(1);
2997 }
2998 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
2999 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3000 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3001 }
3002
3003 /* set the pointer to the MQD */
3004 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3005 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3006 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3007 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3008 /* set MQD vmid to 0 */
3009 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3010 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3011 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3012
3013 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3014 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3015 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3016 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3017 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3018 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3019
3020 /* set up the HQD, this is similar to CP_RB0_CNTL */
3021 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3022 mqd->queue_state.cp_hqd_pq_control &=
3023 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3024
3025 mqd->queue_state.cp_hqd_pq_control |=
3026 drm_order(rdev->ring[idx].ring_size / 8);
3027 mqd->queue_state.cp_hqd_pq_control |=
3028 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3029#ifdef __BIG_ENDIAN
3030 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3031#endif
3032 mqd->queue_state.cp_hqd_pq_control &=
3033 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3034 mqd->queue_state.cp_hqd_pq_control |=
3035 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3036 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3037
3038 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3039 if (i == 0)
3040 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3041 else
3042 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3043 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3044 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3045 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3046 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3047 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3048
3049 /* set the wb address wether it's enabled or not */
3050 if (i == 0)
3051 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3052 else
3053 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3054 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3055 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3056 upper_32_bits(wb_gpu_addr) & 0xffff;
3057 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3058 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3059 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3060 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3061
3062 /* enable the doorbell if requested */
3063 if (use_doorbell) {
3064 mqd->queue_state.cp_hqd_pq_doorbell_control =
3065 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3066 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3067 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3068 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3069 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3070 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3071 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3072
3073 } else {
3074 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3075 }
3076 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3077 mqd->queue_state.cp_hqd_pq_doorbell_control);
3078
3079 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3080 rdev->ring[idx].wptr = 0;
3081 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3082 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3083 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3084 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3085
3086 /* set the vmid for the queue */
3087 mqd->queue_state.cp_hqd_vmid = 0;
3088 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3089
3090 /* activate the queue */
3091 mqd->queue_state.cp_hqd_active = 1;
3092 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3093
3094 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003095 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003096
3097 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3098 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3099
3100 rdev->ring[idx].ready = true;
3101 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3102 if (r)
3103 rdev->ring[idx].ready = false;
3104 }
3105
Alex Deucher841cf442012-12-18 21:47:44 -05003106 return 0;
3107}
3108
Alex Deucher841cf442012-12-18 21:47:44 -05003109static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3110{
3111 cik_cp_gfx_enable(rdev, enable);
3112 cik_cp_compute_enable(rdev, enable);
3113}
3114
Alex Deucher841cf442012-12-18 21:47:44 -05003115static int cik_cp_load_microcode(struct radeon_device *rdev)
3116{
3117 int r;
3118
3119 r = cik_cp_gfx_load_microcode(rdev);
3120 if (r)
3121 return r;
3122 r = cik_cp_compute_load_microcode(rdev);
3123 if (r)
3124 return r;
3125
3126 return 0;
3127}
3128
Alex Deucher841cf442012-12-18 21:47:44 -05003129static void cik_cp_fini(struct radeon_device *rdev)
3130{
3131 cik_cp_gfx_fini(rdev);
3132 cik_cp_compute_fini(rdev);
3133}
3134
Alex Deucher841cf442012-12-18 21:47:44 -05003135static int cik_cp_resume(struct radeon_device *rdev)
3136{
3137 int r;
3138
3139 /* Reset all cp blocks */
3140 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3141 RREG32(GRBM_SOFT_RESET);
3142 mdelay(15);
3143 WREG32(GRBM_SOFT_RESET, 0);
3144 RREG32(GRBM_SOFT_RESET);
3145
3146 r = cik_cp_load_microcode(rdev);
3147 if (r)
3148 return r;
3149
3150 r = cik_cp_gfx_resume(rdev);
3151 if (r)
3152 return r;
3153 r = cik_cp_compute_resume(rdev);
3154 if (r)
3155 return r;
3156
3157 return 0;
3158}
3159
Alex Deucher21a93e12013-04-09 12:47:11 -04003160/*
3161 * sDMA - System DMA
3162 * Starting with CIK, the GPU has new asynchronous
3163 * DMA engines. These engines are used for compute
3164 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3165 * and each one supports 1 ring buffer used for gfx
3166 * and 2 queues used for compute.
3167 *
3168 * The programming model is very similar to the CP
3169 * (ring buffer, IBs, etc.), but sDMA has it's own
3170 * packet format that is different from the PM4 format
3171 * used by the CP. sDMA supports copying data, writing
3172 * embedded data, solid fills, and a number of other
3173 * things. It also has support for tiling/detiling of
3174 * buffers.
3175 */
3176/**
3177 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3178 *
3179 * @rdev: radeon_device pointer
3180 * @ib: IB object to schedule
3181 *
3182 * Schedule an IB in the DMA ring (CIK).
3183 */
3184void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3185 struct radeon_ib *ib)
3186{
3187 struct radeon_ring *ring = &rdev->ring[ib->ring];
3188 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3189
3190 if (rdev->wb.enabled) {
3191 u32 next_rptr = ring->wptr + 5;
3192 while ((next_rptr & 7) != 4)
3193 next_rptr++;
3194 next_rptr += 4;
3195 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3196 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3197 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3198 radeon_ring_write(ring, 1); /* number of DWs to follow */
3199 radeon_ring_write(ring, next_rptr);
3200 }
3201
3202 /* IB packet must end on a 8 DW boundary */
3203 while ((ring->wptr & 7) != 4)
3204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3205 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3206 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3207 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3208 radeon_ring_write(ring, ib->length_dw);
3209
3210}
3211
3212/**
3213 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3214 *
3215 * @rdev: radeon_device pointer
3216 * @fence: radeon fence object
3217 *
3218 * Add a DMA fence packet to the ring to write
3219 * the fence seq number and DMA trap packet to generate
3220 * an interrupt if needed (CIK).
3221 */
3222void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3223 struct radeon_fence *fence)
3224{
3225 struct radeon_ring *ring = &rdev->ring[fence->ring];
3226 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3227 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3228 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3229 u32 ref_and_mask;
3230
3231 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3232 ref_and_mask = SDMA0;
3233 else
3234 ref_and_mask = SDMA1;
3235
3236 /* write the fence */
3237 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3238 radeon_ring_write(ring, addr & 0xffffffff);
3239 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3240 radeon_ring_write(ring, fence->seq);
3241 /* generate an interrupt */
3242 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3243 /* flush HDP */
3244 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3245 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3246 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3247 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3248 radeon_ring_write(ring, ref_and_mask); /* MASK */
3249 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3250}
3251
3252/**
3253 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3254 *
3255 * @rdev: radeon_device pointer
3256 * @ring: radeon_ring structure holding ring information
3257 * @semaphore: radeon semaphore object
3258 * @emit_wait: wait or signal semaphore
3259 *
3260 * Add a DMA semaphore packet to the ring wait on or signal
3261 * other rings (CIK).
3262 */
3263void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3264 struct radeon_ring *ring,
3265 struct radeon_semaphore *semaphore,
3266 bool emit_wait)
3267{
3268 u64 addr = semaphore->gpu_addr;
3269 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3270
3271 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3272 radeon_ring_write(ring, addr & 0xfffffff8);
3273 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3274}
3275
3276/**
3277 * cik_sdma_gfx_stop - stop the gfx async dma engines
3278 *
3279 * @rdev: radeon_device pointer
3280 *
3281 * Stop the gfx async dma ring buffers (CIK).
3282 */
3283static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3284{
3285 u32 rb_cntl, reg_offset;
3286 int i;
3287
3288 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3289
3290 for (i = 0; i < 2; i++) {
3291 if (i == 0)
3292 reg_offset = SDMA0_REGISTER_OFFSET;
3293 else
3294 reg_offset = SDMA1_REGISTER_OFFSET;
3295 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3296 rb_cntl &= ~SDMA_RB_ENABLE;
3297 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3298 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3299 }
3300}
3301
3302/**
3303 * cik_sdma_rlc_stop - stop the compute async dma engines
3304 *
3305 * @rdev: radeon_device pointer
3306 *
3307 * Stop the compute async dma queues (CIK).
3308 */
3309static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3310{
3311 /* XXX todo */
3312}
3313
3314/**
3315 * cik_sdma_enable - stop the async dma engines
3316 *
3317 * @rdev: radeon_device pointer
3318 * @enable: enable/disable the DMA MEs.
3319 *
3320 * Halt or unhalt the async dma engines (CIK).
3321 */
3322static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3323{
3324 u32 me_cntl, reg_offset;
3325 int i;
3326
3327 for (i = 0; i < 2; i++) {
3328 if (i == 0)
3329 reg_offset = SDMA0_REGISTER_OFFSET;
3330 else
3331 reg_offset = SDMA1_REGISTER_OFFSET;
3332 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3333 if (enable)
3334 me_cntl &= ~SDMA_HALT;
3335 else
3336 me_cntl |= SDMA_HALT;
3337 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3338 }
3339}
3340
3341/**
3342 * cik_sdma_gfx_resume - setup and start the async dma engines
3343 *
3344 * @rdev: radeon_device pointer
3345 *
3346 * Set up the gfx DMA ring buffers and enable them (CIK).
3347 * Returns 0 for success, error for failure.
3348 */
3349static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3350{
3351 struct radeon_ring *ring;
3352 u32 rb_cntl, ib_cntl;
3353 u32 rb_bufsz;
3354 u32 reg_offset, wb_offset;
3355 int i, r;
3356
3357 for (i = 0; i < 2; i++) {
3358 if (i == 0) {
3359 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3360 reg_offset = SDMA0_REGISTER_OFFSET;
3361 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3362 } else {
3363 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3364 reg_offset = SDMA1_REGISTER_OFFSET;
3365 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3366 }
3367
3368 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3369 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3370
3371 /* Set ring buffer size in dwords */
3372 rb_bufsz = drm_order(ring->ring_size / 4);
3373 rb_cntl = rb_bufsz << 1;
3374#ifdef __BIG_ENDIAN
3375 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3376#endif
3377 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3378
3379 /* Initialize the ring buffer's read and write pointers */
3380 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3381 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3382
3383 /* set the wb address whether it's enabled or not */
3384 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3385 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3386 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3387 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3388
3389 if (rdev->wb.enabled)
3390 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3391
3392 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3393 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3394
3395 ring->wptr = 0;
3396 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3397
3398 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3399
3400 /* enable DMA RB */
3401 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3402
3403 ib_cntl = SDMA_IB_ENABLE;
3404#ifdef __BIG_ENDIAN
3405 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3406#endif
3407 /* enable DMA IBs */
3408 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3409
3410 ring->ready = true;
3411
3412 r = radeon_ring_test(rdev, ring->idx, ring);
3413 if (r) {
3414 ring->ready = false;
3415 return r;
3416 }
3417 }
3418
3419 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3420
3421 return 0;
3422}
3423
3424/**
3425 * cik_sdma_rlc_resume - setup and start the async dma engines
3426 *
3427 * @rdev: radeon_device pointer
3428 *
3429 * Set up the compute DMA queues and enable them (CIK).
3430 * Returns 0 for success, error for failure.
3431 */
3432static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3433{
3434 /* XXX todo */
3435 return 0;
3436}
3437
3438/**
3439 * cik_sdma_load_microcode - load the sDMA ME ucode
3440 *
3441 * @rdev: radeon_device pointer
3442 *
3443 * Loads the sDMA0/1 ucode.
3444 * Returns 0 for success, -EINVAL if the ucode is not available.
3445 */
3446static int cik_sdma_load_microcode(struct radeon_device *rdev)
3447{
3448 const __be32 *fw_data;
3449 int i;
3450
3451 if (!rdev->sdma_fw)
3452 return -EINVAL;
3453
3454 /* stop the gfx rings and rlc compute queues */
3455 cik_sdma_gfx_stop(rdev);
3456 cik_sdma_rlc_stop(rdev);
3457
3458 /* halt the MEs */
3459 cik_sdma_enable(rdev, false);
3460
3461 /* sdma0 */
3462 fw_data = (const __be32 *)rdev->sdma_fw->data;
3463 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3464 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3465 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3466 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3467
3468 /* sdma1 */
3469 fw_data = (const __be32 *)rdev->sdma_fw->data;
3470 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3471 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3472 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3473 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3474
3475 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3476 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3477 return 0;
3478}
3479
3480/**
3481 * cik_sdma_resume - setup and start the async dma engines
3482 *
3483 * @rdev: radeon_device pointer
3484 *
3485 * Set up the DMA engines and enable them (CIK).
3486 * Returns 0 for success, error for failure.
3487 */
3488static int cik_sdma_resume(struct radeon_device *rdev)
3489{
3490 int r;
3491
3492 /* Reset dma */
3493 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3494 RREG32(SRBM_SOFT_RESET);
3495 udelay(50);
3496 WREG32(SRBM_SOFT_RESET, 0);
3497 RREG32(SRBM_SOFT_RESET);
3498
3499 r = cik_sdma_load_microcode(rdev);
3500 if (r)
3501 return r;
3502
3503 /* unhalt the MEs */
3504 cik_sdma_enable(rdev, true);
3505
3506 /* start the gfx rings and rlc compute queues */
3507 r = cik_sdma_gfx_resume(rdev);
3508 if (r)
3509 return r;
3510 r = cik_sdma_rlc_resume(rdev);
3511 if (r)
3512 return r;
3513
3514 return 0;
3515}
3516
3517/**
3518 * cik_sdma_fini - tear down the async dma engines
3519 *
3520 * @rdev: radeon_device pointer
3521 *
3522 * Stop the async dma engines and free the rings (CIK).
3523 */
3524static void cik_sdma_fini(struct radeon_device *rdev)
3525{
3526 /* stop the gfx rings and rlc compute queues */
3527 cik_sdma_gfx_stop(rdev);
3528 cik_sdma_rlc_stop(rdev);
3529 /* halt the MEs */
3530 cik_sdma_enable(rdev, false);
3531 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3532 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3533 /* XXX - compute dma queue tear down */
3534}
3535
3536/**
3537 * cik_copy_dma - copy pages using the DMA engine
3538 *
3539 * @rdev: radeon_device pointer
3540 * @src_offset: src GPU address
3541 * @dst_offset: dst GPU address
3542 * @num_gpu_pages: number of GPU pages to xfer
3543 * @fence: radeon fence object
3544 *
3545 * Copy GPU paging using the DMA engine (CIK).
3546 * Used by the radeon ttm implementation to move pages if
3547 * registered as the asic copy callback.
3548 */
3549int cik_copy_dma(struct radeon_device *rdev,
3550 uint64_t src_offset, uint64_t dst_offset,
3551 unsigned num_gpu_pages,
3552 struct radeon_fence **fence)
3553{
3554 struct radeon_semaphore *sem = NULL;
3555 int ring_index = rdev->asic->copy.dma_ring_index;
3556 struct radeon_ring *ring = &rdev->ring[ring_index];
3557 u32 size_in_bytes, cur_size_in_bytes;
3558 int i, num_loops;
3559 int r = 0;
3560
3561 r = radeon_semaphore_create(rdev, &sem);
3562 if (r) {
3563 DRM_ERROR("radeon: moving bo (%d).\n", r);
3564 return r;
3565 }
3566
3567 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3568 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3569 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3570 if (r) {
3571 DRM_ERROR("radeon: moving bo (%d).\n", r);
3572 radeon_semaphore_free(rdev, &sem, NULL);
3573 return r;
3574 }
3575
3576 if (radeon_fence_need_sync(*fence, ring->idx)) {
3577 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3578 ring->idx);
3579 radeon_fence_note_sync(*fence, ring->idx);
3580 } else {
3581 radeon_semaphore_free(rdev, &sem, NULL);
3582 }
3583
3584 for (i = 0; i < num_loops; i++) {
3585 cur_size_in_bytes = size_in_bytes;
3586 if (cur_size_in_bytes > 0x1fffff)
3587 cur_size_in_bytes = 0x1fffff;
3588 size_in_bytes -= cur_size_in_bytes;
3589 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3590 radeon_ring_write(ring, cur_size_in_bytes);
3591 radeon_ring_write(ring, 0); /* src/dst endian swap */
3592 radeon_ring_write(ring, src_offset & 0xffffffff);
3593 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3594 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3595 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3596 src_offset += cur_size_in_bytes;
3597 dst_offset += cur_size_in_bytes;
3598 }
3599
3600 r = radeon_fence_emit(rdev, fence, ring->idx);
3601 if (r) {
3602 radeon_ring_unlock_undo(rdev, ring);
3603 return r;
3604 }
3605
3606 radeon_ring_unlock_commit(rdev, ring);
3607 radeon_semaphore_free(rdev, &sem, *fence);
3608
3609 return r;
3610}
3611
3612/**
3613 * cik_sdma_ring_test - simple async dma engine test
3614 *
3615 * @rdev: radeon_device pointer
3616 * @ring: radeon_ring structure holding ring information
3617 *
3618 * Test the DMA engine by writing using it to write an
3619 * value to memory. (CIK).
3620 * Returns 0 for success, error for failure.
3621 */
3622int cik_sdma_ring_test(struct radeon_device *rdev,
3623 struct radeon_ring *ring)
3624{
3625 unsigned i;
3626 int r;
3627 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3628 u32 tmp;
3629
3630 if (!ptr) {
3631 DRM_ERROR("invalid vram scratch pointer\n");
3632 return -EINVAL;
3633 }
3634
3635 tmp = 0xCAFEDEAD;
3636 writel(tmp, ptr);
3637
3638 r = radeon_ring_lock(rdev, ring, 4);
3639 if (r) {
3640 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3641 return r;
3642 }
3643 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3644 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3645 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3646 radeon_ring_write(ring, 1); /* number of DWs to follow */
3647 radeon_ring_write(ring, 0xDEADBEEF);
3648 radeon_ring_unlock_commit(rdev, ring);
3649
3650 for (i = 0; i < rdev->usec_timeout; i++) {
3651 tmp = readl(ptr);
3652 if (tmp == 0xDEADBEEF)
3653 break;
3654 DRM_UDELAY(1);
3655 }
3656
3657 if (i < rdev->usec_timeout) {
3658 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3659 } else {
3660 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3661 ring->idx, tmp);
3662 r = -EINVAL;
3663 }
3664 return r;
3665}
3666
3667/**
3668 * cik_sdma_ib_test - test an IB on the DMA engine
3669 *
3670 * @rdev: radeon_device pointer
3671 * @ring: radeon_ring structure holding ring information
3672 *
3673 * Test a simple IB in the DMA ring (CIK).
3674 * Returns 0 on success, error on failure.
3675 */
3676int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3677{
3678 struct radeon_ib ib;
3679 unsigned i;
3680 int r;
3681 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3682 u32 tmp = 0;
3683
3684 if (!ptr) {
3685 DRM_ERROR("invalid vram scratch pointer\n");
3686 return -EINVAL;
3687 }
3688
3689 tmp = 0xCAFEDEAD;
3690 writel(tmp, ptr);
3691
3692 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3693 if (r) {
3694 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3695 return r;
3696 }
3697
3698 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3699 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3700 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3701 ib.ptr[3] = 1;
3702 ib.ptr[4] = 0xDEADBEEF;
3703 ib.length_dw = 5;
3704
3705 r = radeon_ib_schedule(rdev, &ib, NULL);
3706 if (r) {
3707 radeon_ib_free(rdev, &ib);
3708 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3709 return r;
3710 }
3711 r = radeon_fence_wait(ib.fence, false);
3712 if (r) {
3713 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3714 return r;
3715 }
3716 for (i = 0; i < rdev->usec_timeout; i++) {
3717 tmp = readl(ptr);
3718 if (tmp == 0xDEADBEEF)
3719 break;
3720 DRM_UDELAY(1);
3721 }
3722 if (i < rdev->usec_timeout) {
3723 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3724 } else {
3725 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3726 r = -EINVAL;
3727 }
3728 radeon_ib_free(rdev, &ib);
3729 return r;
3730}
3731
Alex Deuchercc066712013-04-09 12:59:51 -04003732
3733static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3734{
3735 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3736 RREG32(GRBM_STATUS));
3737 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3738 RREG32(GRBM_STATUS2));
3739 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3740 RREG32(GRBM_STATUS_SE0));
3741 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3742 RREG32(GRBM_STATUS_SE1));
3743 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3744 RREG32(GRBM_STATUS_SE2));
3745 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3746 RREG32(GRBM_STATUS_SE3));
3747 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3748 RREG32(SRBM_STATUS));
3749 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3750 RREG32(SRBM_STATUS2));
3751 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3752 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3753 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3754 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003755 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3756 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3757 RREG32(CP_STALLED_STAT1));
3758 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3759 RREG32(CP_STALLED_STAT2));
3760 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3761 RREG32(CP_STALLED_STAT3));
3762 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3763 RREG32(CP_CPF_BUSY_STAT));
3764 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3765 RREG32(CP_CPF_STALLED_STAT1));
3766 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3767 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3768 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3769 RREG32(CP_CPC_STALLED_STAT1));
3770 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003771}
3772
Alex Deucher6f2043c2013-04-09 12:43:41 -04003773/**
Alex Deuchercc066712013-04-09 12:59:51 -04003774 * cik_gpu_check_soft_reset - check which blocks are busy
3775 *
3776 * @rdev: radeon_device pointer
3777 *
3778 * Check which blocks are busy and return the relevant reset
3779 * mask to be used by cik_gpu_soft_reset().
3780 * Returns a mask of the blocks to be reset.
3781 */
3782static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3783{
3784 u32 reset_mask = 0;
3785 u32 tmp;
3786
3787 /* GRBM_STATUS */
3788 tmp = RREG32(GRBM_STATUS);
3789 if (tmp & (PA_BUSY | SC_BUSY |
3790 BCI_BUSY | SX_BUSY |
3791 TA_BUSY | VGT_BUSY |
3792 DB_BUSY | CB_BUSY |
3793 GDS_BUSY | SPI_BUSY |
3794 IA_BUSY | IA_BUSY_NO_DMA))
3795 reset_mask |= RADEON_RESET_GFX;
3796
3797 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3798 reset_mask |= RADEON_RESET_CP;
3799
3800 /* GRBM_STATUS2 */
3801 tmp = RREG32(GRBM_STATUS2);
3802 if (tmp & RLC_BUSY)
3803 reset_mask |= RADEON_RESET_RLC;
3804
3805 /* SDMA0_STATUS_REG */
3806 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3807 if (!(tmp & SDMA_IDLE))
3808 reset_mask |= RADEON_RESET_DMA;
3809
3810 /* SDMA1_STATUS_REG */
3811 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3812 if (!(tmp & SDMA_IDLE))
3813 reset_mask |= RADEON_RESET_DMA1;
3814
3815 /* SRBM_STATUS2 */
3816 tmp = RREG32(SRBM_STATUS2);
3817 if (tmp & SDMA_BUSY)
3818 reset_mask |= RADEON_RESET_DMA;
3819
3820 if (tmp & SDMA1_BUSY)
3821 reset_mask |= RADEON_RESET_DMA1;
3822
3823 /* SRBM_STATUS */
3824 tmp = RREG32(SRBM_STATUS);
3825
3826 if (tmp & IH_BUSY)
3827 reset_mask |= RADEON_RESET_IH;
3828
3829 if (tmp & SEM_BUSY)
3830 reset_mask |= RADEON_RESET_SEM;
3831
3832 if (tmp & GRBM_RQ_PENDING)
3833 reset_mask |= RADEON_RESET_GRBM;
3834
3835 if (tmp & VMC_BUSY)
3836 reset_mask |= RADEON_RESET_VMC;
3837
3838 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3839 MCC_BUSY | MCD_BUSY))
3840 reset_mask |= RADEON_RESET_MC;
3841
3842 if (evergreen_is_display_hung(rdev))
3843 reset_mask |= RADEON_RESET_DISPLAY;
3844
3845 /* Skip MC reset as it's mostly likely not hung, just busy */
3846 if (reset_mask & RADEON_RESET_MC) {
3847 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3848 reset_mask &= ~RADEON_RESET_MC;
3849 }
3850
3851 return reset_mask;
3852}
3853
3854/**
3855 * cik_gpu_soft_reset - soft reset GPU
3856 *
3857 * @rdev: radeon_device pointer
3858 * @reset_mask: mask of which blocks to reset
3859 *
3860 * Soft reset the blocks specified in @reset_mask.
3861 */
3862static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3863{
3864 struct evergreen_mc_save save;
3865 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3866 u32 tmp;
3867
3868 if (reset_mask == 0)
3869 return;
3870
3871 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3872
3873 cik_print_gpu_status_regs(rdev);
3874 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3875 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3876 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3877 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3878
3879 /* stop the rlc */
3880 cik_rlc_stop(rdev);
3881
3882 /* Disable GFX parsing/prefetching */
3883 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3884
3885 /* Disable MEC parsing/prefetching */
3886 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3887
3888 if (reset_mask & RADEON_RESET_DMA) {
3889 /* sdma0 */
3890 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3891 tmp |= SDMA_HALT;
3892 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3893 }
3894 if (reset_mask & RADEON_RESET_DMA1) {
3895 /* sdma1 */
3896 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3897 tmp |= SDMA_HALT;
3898 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3899 }
3900
3901 evergreen_mc_stop(rdev, &save);
3902 if (evergreen_mc_wait_for_idle(rdev)) {
3903 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3904 }
3905
3906 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3907 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3908
3909 if (reset_mask & RADEON_RESET_CP) {
3910 grbm_soft_reset |= SOFT_RESET_CP;
3911
3912 srbm_soft_reset |= SOFT_RESET_GRBM;
3913 }
3914
3915 if (reset_mask & RADEON_RESET_DMA)
3916 srbm_soft_reset |= SOFT_RESET_SDMA;
3917
3918 if (reset_mask & RADEON_RESET_DMA1)
3919 srbm_soft_reset |= SOFT_RESET_SDMA1;
3920
3921 if (reset_mask & RADEON_RESET_DISPLAY)
3922 srbm_soft_reset |= SOFT_RESET_DC;
3923
3924 if (reset_mask & RADEON_RESET_RLC)
3925 grbm_soft_reset |= SOFT_RESET_RLC;
3926
3927 if (reset_mask & RADEON_RESET_SEM)
3928 srbm_soft_reset |= SOFT_RESET_SEM;
3929
3930 if (reset_mask & RADEON_RESET_IH)
3931 srbm_soft_reset |= SOFT_RESET_IH;
3932
3933 if (reset_mask & RADEON_RESET_GRBM)
3934 srbm_soft_reset |= SOFT_RESET_GRBM;
3935
3936 if (reset_mask & RADEON_RESET_VMC)
3937 srbm_soft_reset |= SOFT_RESET_VMC;
3938
3939 if (!(rdev->flags & RADEON_IS_IGP)) {
3940 if (reset_mask & RADEON_RESET_MC)
3941 srbm_soft_reset |= SOFT_RESET_MC;
3942 }
3943
3944 if (grbm_soft_reset) {
3945 tmp = RREG32(GRBM_SOFT_RESET);
3946 tmp |= grbm_soft_reset;
3947 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3948 WREG32(GRBM_SOFT_RESET, tmp);
3949 tmp = RREG32(GRBM_SOFT_RESET);
3950
3951 udelay(50);
3952
3953 tmp &= ~grbm_soft_reset;
3954 WREG32(GRBM_SOFT_RESET, tmp);
3955 tmp = RREG32(GRBM_SOFT_RESET);
3956 }
3957
3958 if (srbm_soft_reset) {
3959 tmp = RREG32(SRBM_SOFT_RESET);
3960 tmp |= srbm_soft_reset;
3961 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3962 WREG32(SRBM_SOFT_RESET, tmp);
3963 tmp = RREG32(SRBM_SOFT_RESET);
3964
3965 udelay(50);
3966
3967 tmp &= ~srbm_soft_reset;
3968 WREG32(SRBM_SOFT_RESET, tmp);
3969 tmp = RREG32(SRBM_SOFT_RESET);
3970 }
3971
3972 /* Wait a little for things to settle down */
3973 udelay(50);
3974
3975 evergreen_mc_resume(rdev, &save);
3976 udelay(50);
3977
3978 cik_print_gpu_status_regs(rdev);
3979}
3980
3981/**
3982 * cik_asic_reset - soft reset GPU
3983 *
3984 * @rdev: radeon_device pointer
3985 *
3986 * Look up which blocks are hung and attempt
3987 * to reset them.
3988 * Returns 0 for success.
3989 */
3990int cik_asic_reset(struct radeon_device *rdev)
3991{
3992 u32 reset_mask;
3993
3994 reset_mask = cik_gpu_check_soft_reset(rdev);
3995
3996 if (reset_mask)
3997 r600_set_bios_scratch_engine_hung(rdev, true);
3998
3999 cik_gpu_soft_reset(rdev, reset_mask);
4000
4001 reset_mask = cik_gpu_check_soft_reset(rdev);
4002
4003 if (!reset_mask)
4004 r600_set_bios_scratch_engine_hung(rdev, false);
4005
4006 return 0;
4007}
4008
4009/**
4010 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004011 *
4012 * @rdev: radeon_device pointer
4013 * @ring: radeon_ring structure holding ring information
4014 *
4015 * Check if the 3D engine is locked up (CIK).
4016 * Returns true if the engine is locked, false if not.
4017 */
Alex Deuchercc066712013-04-09 12:59:51 -04004018bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004019{
Alex Deuchercc066712013-04-09 12:59:51 -04004020 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004021
Alex Deuchercc066712013-04-09 12:59:51 -04004022 if (!(reset_mask & (RADEON_RESET_GFX |
4023 RADEON_RESET_COMPUTE |
4024 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004025 radeon_ring_lockup_update(ring);
4026 return false;
4027 }
4028 /* force CP activities */
4029 radeon_ring_force_activity(rdev, ring);
4030 return radeon_ring_test_lockup(rdev, ring);
4031}
4032
4033/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004034 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4035 *
4036 * @rdev: radeon_device pointer
4037 * @ring: radeon_ring structure holding ring information
4038 *
4039 * Check if the async DMA engine is locked up (CIK).
4040 * Returns true if the engine appears to be locked up, false if not.
4041 */
4042bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4043{
Alex Deuchercc066712013-04-09 12:59:51 -04004044 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4045 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004046
4047 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004048 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004049 else
Alex Deuchercc066712013-04-09 12:59:51 -04004050 mask = RADEON_RESET_DMA1;
4051
4052 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004053 radeon_ring_lockup_update(ring);
4054 return false;
4055 }
4056 /* force ring activities */
4057 radeon_ring_force_activity(rdev, ring);
4058 return radeon_ring_test_lockup(rdev, ring);
4059}
4060
Alex Deucher1c491652013-04-09 12:45:26 -04004061/* MC */
4062/**
4063 * cik_mc_program - program the GPU memory controller
4064 *
4065 * @rdev: radeon_device pointer
4066 *
4067 * Set the location of vram, gart, and AGP in the GPU's
4068 * physical address space (CIK).
4069 */
4070static void cik_mc_program(struct radeon_device *rdev)
4071{
4072 struct evergreen_mc_save save;
4073 u32 tmp;
4074 int i, j;
4075
4076 /* Initialize HDP */
4077 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4078 WREG32((0x2c14 + j), 0x00000000);
4079 WREG32((0x2c18 + j), 0x00000000);
4080 WREG32((0x2c1c + j), 0x00000000);
4081 WREG32((0x2c20 + j), 0x00000000);
4082 WREG32((0x2c24 + j), 0x00000000);
4083 }
4084 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4085
4086 evergreen_mc_stop(rdev, &save);
4087 if (radeon_mc_wait_for_idle(rdev)) {
4088 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4089 }
4090 /* Lockout access through VGA aperture*/
4091 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4092 /* Update configuration */
4093 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4094 rdev->mc.vram_start >> 12);
4095 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4096 rdev->mc.vram_end >> 12);
4097 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4098 rdev->vram_scratch.gpu_addr >> 12);
4099 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4100 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4101 WREG32(MC_VM_FB_LOCATION, tmp);
4102 /* XXX double check these! */
4103 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4104 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4105 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4106 WREG32(MC_VM_AGP_BASE, 0);
4107 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4108 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4109 if (radeon_mc_wait_for_idle(rdev)) {
4110 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4111 }
4112 evergreen_mc_resume(rdev, &save);
4113 /* we need to own VRAM, so turn off the VGA renderer here
4114 * to stop it overwriting our objects */
4115 rv515_vga_render_disable(rdev);
4116}
4117
4118/**
4119 * cik_mc_init - initialize the memory controller driver params
4120 *
4121 * @rdev: radeon_device pointer
4122 *
4123 * Look up the amount of vram, vram width, and decide how to place
4124 * vram and gart within the GPU's physical address space (CIK).
4125 * Returns 0 for success.
4126 */
4127static int cik_mc_init(struct radeon_device *rdev)
4128{
4129 u32 tmp;
4130 int chansize, numchan;
4131
4132 /* Get VRAM informations */
4133 rdev->mc.vram_is_ddr = true;
4134 tmp = RREG32(MC_ARB_RAMCFG);
4135 if (tmp & CHANSIZE_MASK) {
4136 chansize = 64;
4137 } else {
4138 chansize = 32;
4139 }
4140 tmp = RREG32(MC_SHARED_CHMAP);
4141 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4142 case 0:
4143 default:
4144 numchan = 1;
4145 break;
4146 case 1:
4147 numchan = 2;
4148 break;
4149 case 2:
4150 numchan = 4;
4151 break;
4152 case 3:
4153 numchan = 8;
4154 break;
4155 case 4:
4156 numchan = 3;
4157 break;
4158 case 5:
4159 numchan = 6;
4160 break;
4161 case 6:
4162 numchan = 10;
4163 break;
4164 case 7:
4165 numchan = 12;
4166 break;
4167 case 8:
4168 numchan = 16;
4169 break;
4170 }
4171 rdev->mc.vram_width = numchan * chansize;
4172 /* Could aper size report 0 ? */
4173 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4174 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4175 /* size in MB on si */
4176 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4177 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4178 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4179 si_vram_gtt_location(rdev, &rdev->mc);
4180 radeon_update_bandwidth_info(rdev);
4181
4182 return 0;
4183}
4184
4185/*
4186 * GART
4187 * VMID 0 is the physical GPU addresses as used by the kernel.
4188 * VMIDs 1-15 are used for userspace clients and are handled
4189 * by the radeon vm/hsa code.
4190 */
4191/**
4192 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4193 *
4194 * @rdev: radeon_device pointer
4195 *
4196 * Flush the TLB for the VMID 0 page table (CIK).
4197 */
4198void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4199{
4200 /* flush hdp cache */
4201 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4202
4203 /* bits 0-15 are the VM contexts0-15 */
4204 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4205}
4206
4207/**
4208 * cik_pcie_gart_enable - gart enable
4209 *
4210 * @rdev: radeon_device pointer
4211 *
4212 * This sets up the TLBs, programs the page tables for VMID0,
4213 * sets up the hw for VMIDs 1-15 which are allocated on
4214 * demand, and sets up the global locations for the LDS, GDS,
4215 * and GPUVM for FSA64 clients (CIK).
4216 * Returns 0 for success, errors for failure.
4217 */
4218static int cik_pcie_gart_enable(struct radeon_device *rdev)
4219{
4220 int r, i;
4221
4222 if (rdev->gart.robj == NULL) {
4223 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4224 return -EINVAL;
4225 }
4226 r = radeon_gart_table_vram_pin(rdev);
4227 if (r)
4228 return r;
4229 radeon_gart_restore(rdev);
4230 /* Setup TLB control */
4231 WREG32(MC_VM_MX_L1_TLB_CNTL,
4232 (0xA << 7) |
4233 ENABLE_L1_TLB |
4234 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4235 ENABLE_ADVANCED_DRIVER_MODEL |
4236 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4237 /* Setup L2 cache */
4238 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4239 ENABLE_L2_FRAGMENT_PROCESSING |
4240 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4241 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4242 EFFECTIVE_L2_QUEUE_SIZE(7) |
4243 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4244 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4245 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4246 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4247 /* setup context0 */
4248 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4249 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4250 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4251 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4252 (u32)(rdev->dummy_page.addr >> 12));
4253 WREG32(VM_CONTEXT0_CNTL2, 0);
4254 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4255 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4256
4257 WREG32(0x15D4, 0);
4258 WREG32(0x15D8, 0);
4259 WREG32(0x15DC, 0);
4260
4261 /* empty context1-15 */
4262 /* FIXME start with 4G, once using 2 level pt switch to full
4263 * vm size space
4264 */
4265 /* set vm size, must be a multiple of 4 */
4266 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4267 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4268 for (i = 1; i < 16; i++) {
4269 if (i < 8)
4270 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4271 rdev->gart.table_addr >> 12);
4272 else
4273 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4274 rdev->gart.table_addr >> 12);
4275 }
4276
4277 /* enable context1-15 */
4278 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4279 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004280 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004281 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004282 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4283 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4284 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4285 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4286 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4287 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4288 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4290 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004294
4295 /* TC cache setup ??? */
4296 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4297 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4298 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4299
4300 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4301 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4302 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4303 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4304 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4305
4306 WREG32(TC_CFG_L1_VOLATILE, 0);
4307 WREG32(TC_CFG_L2_VOLATILE, 0);
4308
4309 if (rdev->family == CHIP_KAVERI) {
4310 u32 tmp = RREG32(CHUB_CONTROL);
4311 tmp &= ~BYPASS_VM;
4312 WREG32(CHUB_CONTROL, tmp);
4313 }
4314
4315 /* XXX SH_MEM regs */
4316 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04004317 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004318 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004319 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004320 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004321 WREG32(SH_MEM_CONFIG, 0);
4322 WREG32(SH_MEM_APE1_BASE, 1);
4323 WREG32(SH_MEM_APE1_LIMIT, 0);
4324 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004325 /* SDMA GFX */
4326 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4327 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4328 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4329 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4330 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004331 }
Alex Deucherb556b122013-01-29 10:44:22 -05004332 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04004333 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004334
4335 cik_pcie_gart_tlb_flush(rdev);
4336 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4337 (unsigned)(rdev->mc.gtt_size >> 20),
4338 (unsigned long long)rdev->gart.table_addr);
4339 rdev->gart.ready = true;
4340 return 0;
4341}
4342
4343/**
4344 * cik_pcie_gart_disable - gart disable
4345 *
4346 * @rdev: radeon_device pointer
4347 *
4348 * This disables all VM page table (CIK).
4349 */
4350static void cik_pcie_gart_disable(struct radeon_device *rdev)
4351{
4352 /* Disable all tables */
4353 WREG32(VM_CONTEXT0_CNTL, 0);
4354 WREG32(VM_CONTEXT1_CNTL, 0);
4355 /* Setup TLB control */
4356 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4357 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4358 /* Setup L2 cache */
4359 WREG32(VM_L2_CNTL,
4360 ENABLE_L2_FRAGMENT_PROCESSING |
4361 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4362 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4363 EFFECTIVE_L2_QUEUE_SIZE(7) |
4364 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4365 WREG32(VM_L2_CNTL2, 0);
4366 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4367 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4368 radeon_gart_table_vram_unpin(rdev);
4369}
4370
4371/**
4372 * cik_pcie_gart_fini - vm fini callback
4373 *
4374 * @rdev: radeon_device pointer
4375 *
4376 * Tears down the driver GART/VM setup (CIK).
4377 */
4378static void cik_pcie_gart_fini(struct radeon_device *rdev)
4379{
4380 cik_pcie_gart_disable(rdev);
4381 radeon_gart_table_vram_free(rdev);
4382 radeon_gart_fini(rdev);
4383}
4384
4385/* vm parser */
4386/**
4387 * cik_ib_parse - vm ib_parse callback
4388 *
4389 * @rdev: radeon_device pointer
4390 * @ib: indirect buffer pointer
4391 *
4392 * CIK uses hw IB checking so this is a nop (CIK).
4393 */
4394int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4395{
4396 return 0;
4397}
4398
4399/*
4400 * vm
4401 * VMID 0 is the physical GPU addresses as used by the kernel.
4402 * VMIDs 1-15 are used for userspace clients and are handled
4403 * by the radeon vm/hsa code.
4404 */
4405/**
4406 * cik_vm_init - cik vm init callback
4407 *
4408 * @rdev: radeon_device pointer
4409 *
4410 * Inits cik specific vm parameters (number of VMs, base of vram for
4411 * VMIDs 1-15) (CIK).
4412 * Returns 0 for success.
4413 */
4414int cik_vm_init(struct radeon_device *rdev)
4415{
4416 /* number of VMs */
4417 rdev->vm_manager.nvm = 16;
4418 /* base offset of vram pages */
4419 if (rdev->flags & RADEON_IS_IGP) {
4420 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4421 tmp <<= 22;
4422 rdev->vm_manager.vram_base_offset = tmp;
4423 } else
4424 rdev->vm_manager.vram_base_offset = 0;
4425
4426 return 0;
4427}
4428
4429/**
4430 * cik_vm_fini - cik vm fini callback
4431 *
4432 * @rdev: radeon_device pointer
4433 *
4434 * Tear down any asic specific VM setup (CIK).
4435 */
4436void cik_vm_fini(struct radeon_device *rdev)
4437{
4438}
4439
Alex Deucherf96ab482012-08-31 10:37:47 -04004440/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04004441 * cik_vm_decode_fault - print human readable fault info
4442 *
4443 * @rdev: radeon_device pointer
4444 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4445 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4446 *
4447 * Print human readable fault information (CIK).
4448 */
4449static void cik_vm_decode_fault(struct radeon_device *rdev,
4450 u32 status, u32 addr, u32 mc_client)
4451{
4452 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4453 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4454 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4455 char *block = (char *)&mc_client;
4456
4457 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4458 protections, vmid, addr,
4459 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4460 block, mc_id);
4461}
4462
4463/**
Alex Deucherf96ab482012-08-31 10:37:47 -04004464 * cik_vm_flush - cik vm flush using the CP
4465 *
4466 * @rdev: radeon_device pointer
4467 *
4468 * Update the page table base and flush the VM TLB
4469 * using the CP (CIK).
4470 */
4471void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4472{
4473 struct radeon_ring *ring = &rdev->ring[ridx];
4474
4475 if (vm == NULL)
4476 return;
4477
4478 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4479 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4480 WRITE_DATA_DST_SEL(0)));
4481 if (vm->id < 8) {
4482 radeon_ring_write(ring,
4483 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4484 } else {
4485 radeon_ring_write(ring,
4486 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4487 }
4488 radeon_ring_write(ring, 0);
4489 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4490
4491 /* update SH_MEM_* regs */
4492 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4493 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4494 WRITE_DATA_DST_SEL(0)));
4495 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4496 radeon_ring_write(ring, 0);
4497 radeon_ring_write(ring, VMID(vm->id));
4498
4499 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4500 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4501 WRITE_DATA_DST_SEL(0)));
4502 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4503 radeon_ring_write(ring, 0);
4504
4505 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4506 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4507 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4508 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4509
4510 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4511 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4512 WRITE_DATA_DST_SEL(0)));
4513 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4514 radeon_ring_write(ring, 0);
4515 radeon_ring_write(ring, VMID(0));
4516
4517 /* HDP flush */
4518 /* We should be using the WAIT_REG_MEM packet here like in
4519 * cik_fence_ring_emit(), but it causes the CP to hang in this
4520 * context...
4521 */
4522 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4523 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4524 WRITE_DATA_DST_SEL(0)));
4525 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4526 radeon_ring_write(ring, 0);
4527 radeon_ring_write(ring, 0);
4528
4529 /* bits 0-15 are the VM contexts0-15 */
4530 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4531 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4532 WRITE_DATA_DST_SEL(0)));
4533 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4534 radeon_ring_write(ring, 0);
4535 radeon_ring_write(ring, 1 << vm->id);
4536
Alex Deucherb07fdd32013-04-11 09:36:17 -04004537 /* compute doesn't have PFP */
4538 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4539 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4540 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4541 radeon_ring_write(ring, 0x0);
4542 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004543}
4544
Alex Deucher605de6b2012-10-22 13:04:03 -04004545/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004546 * cik_vm_set_page - update the page tables using sDMA
4547 *
4548 * @rdev: radeon_device pointer
4549 * @ib: indirect buffer to fill with commands
4550 * @pe: addr of the page entry
4551 * @addr: dst addr to write into pe
4552 * @count: number of page entries to update
4553 * @incr: increase next addr by incr bytes
4554 * @flags: access flags
4555 *
4556 * Update the page tables using CP or sDMA (CIK).
4557 */
4558void cik_vm_set_page(struct radeon_device *rdev,
4559 struct radeon_ib *ib,
4560 uint64_t pe,
4561 uint64_t addr, unsigned count,
4562 uint32_t incr, uint32_t flags)
4563{
4564 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4565 uint64_t value;
4566 unsigned ndw;
4567
4568 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4569 /* CP */
4570 while (count) {
4571 ndw = 2 + count * 2;
4572 if (ndw > 0x3FFE)
4573 ndw = 0x3FFE;
4574
4575 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4576 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4577 WRITE_DATA_DST_SEL(1));
4578 ib->ptr[ib->length_dw++] = pe;
4579 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4580 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4581 if (flags & RADEON_VM_PAGE_SYSTEM) {
4582 value = radeon_vm_map_gart(rdev, addr);
4583 value &= 0xFFFFFFFFFFFFF000ULL;
4584 } else if (flags & RADEON_VM_PAGE_VALID) {
4585 value = addr;
4586 } else {
4587 value = 0;
4588 }
4589 addr += incr;
4590 value |= r600_flags;
4591 ib->ptr[ib->length_dw++] = value;
4592 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4593 }
4594 }
4595 } else {
4596 /* DMA */
4597 if (flags & RADEON_VM_PAGE_SYSTEM) {
4598 while (count) {
4599 ndw = count * 2;
4600 if (ndw > 0xFFFFE)
4601 ndw = 0xFFFFE;
4602
4603 /* for non-physically contiguous pages (system) */
4604 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4605 ib->ptr[ib->length_dw++] = pe;
4606 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4607 ib->ptr[ib->length_dw++] = ndw;
4608 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4609 if (flags & RADEON_VM_PAGE_SYSTEM) {
4610 value = radeon_vm_map_gart(rdev, addr);
4611 value &= 0xFFFFFFFFFFFFF000ULL;
4612 } else if (flags & RADEON_VM_PAGE_VALID) {
4613 value = addr;
4614 } else {
4615 value = 0;
4616 }
4617 addr += incr;
4618 value |= r600_flags;
4619 ib->ptr[ib->length_dw++] = value;
4620 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4621 }
4622 }
4623 } else {
4624 while (count) {
4625 ndw = count;
4626 if (ndw > 0x7FFFF)
4627 ndw = 0x7FFFF;
4628
4629 if (flags & RADEON_VM_PAGE_VALID)
4630 value = addr;
4631 else
4632 value = 0;
4633 /* for physically contiguous pages (vram) */
4634 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4635 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4636 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4637 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4638 ib->ptr[ib->length_dw++] = 0;
4639 ib->ptr[ib->length_dw++] = value; /* value */
4640 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4641 ib->ptr[ib->length_dw++] = incr; /* increment size */
4642 ib->ptr[ib->length_dw++] = 0;
4643 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4644 pe += ndw * 8;
4645 addr += ndw * incr;
4646 count -= ndw;
4647 }
4648 }
4649 while (ib->length_dw & 0x7)
4650 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4651 }
4652}
4653
4654/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004655 * cik_dma_vm_flush - cik vm flush using sDMA
4656 *
4657 * @rdev: radeon_device pointer
4658 *
4659 * Update the page table base and flush the VM TLB
4660 * using sDMA (CIK).
4661 */
4662void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4663{
4664 struct radeon_ring *ring = &rdev->ring[ridx];
4665 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4666 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4667 u32 ref_and_mask;
4668
4669 if (vm == NULL)
4670 return;
4671
4672 if (ridx == R600_RING_TYPE_DMA_INDEX)
4673 ref_and_mask = SDMA0;
4674 else
4675 ref_and_mask = SDMA1;
4676
4677 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4678 if (vm->id < 8) {
4679 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4680 } else {
4681 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4682 }
4683 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4684
4685 /* update SH_MEM_* regs */
4686 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4688 radeon_ring_write(ring, VMID(vm->id));
4689
4690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4692 radeon_ring_write(ring, 0);
4693
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4696 radeon_ring_write(ring, 0);
4697
4698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4700 radeon_ring_write(ring, 1);
4701
4702 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4704 radeon_ring_write(ring, 0);
4705
4706 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4708 radeon_ring_write(ring, VMID(0));
4709
4710 /* flush HDP */
4711 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4712 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4713 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4714 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4715 radeon_ring_write(ring, ref_and_mask); /* MASK */
4716 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4717
4718 /* flush TLB */
4719 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4720 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4721 radeon_ring_write(ring, 1 << vm->id);
4722}
4723
Alex Deucherf6796ca2012-11-09 10:44:08 -05004724/*
4725 * RLC
4726 * The RLC is a multi-purpose microengine that handles a
4727 * variety of functions, the most important of which is
4728 * the interrupt controller.
4729 */
4730/**
4731 * cik_rlc_stop - stop the RLC ME
4732 *
4733 * @rdev: radeon_device pointer
4734 *
4735 * Halt the RLC ME (MicroEngine) (CIK).
4736 */
4737static void cik_rlc_stop(struct radeon_device *rdev)
4738{
4739 int i, j, k;
4740 u32 mask, tmp;
4741
4742 tmp = RREG32(CP_INT_CNTL_RING0);
4743 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4744 WREG32(CP_INT_CNTL_RING0, tmp);
4745
4746 RREG32(CB_CGTT_SCLK_CTRL);
4747 RREG32(CB_CGTT_SCLK_CTRL);
4748 RREG32(CB_CGTT_SCLK_CTRL);
4749 RREG32(CB_CGTT_SCLK_CTRL);
4750
4751 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4752 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4753
4754 WREG32(RLC_CNTL, 0);
4755
4756 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4757 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4758 cik_select_se_sh(rdev, i, j);
4759 for (k = 0; k < rdev->usec_timeout; k++) {
4760 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4761 break;
4762 udelay(1);
4763 }
4764 }
4765 }
4766 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4767
4768 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4769 for (k = 0; k < rdev->usec_timeout; k++) {
4770 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4771 break;
4772 udelay(1);
4773 }
4774}
4775
4776/**
4777 * cik_rlc_start - start the RLC ME
4778 *
4779 * @rdev: radeon_device pointer
4780 *
4781 * Unhalt the RLC ME (MicroEngine) (CIK).
4782 */
4783static void cik_rlc_start(struct radeon_device *rdev)
4784{
4785 u32 tmp;
4786
4787 WREG32(RLC_CNTL, RLC_ENABLE);
4788
4789 tmp = RREG32(CP_INT_CNTL_RING0);
4790 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4791 WREG32(CP_INT_CNTL_RING0, tmp);
4792
4793 udelay(50);
4794}
4795
4796/**
4797 * cik_rlc_resume - setup the RLC hw
4798 *
4799 * @rdev: radeon_device pointer
4800 *
4801 * Initialize the RLC registers, load the ucode,
4802 * and start the RLC (CIK).
4803 * Returns 0 for success, -EINVAL if the ucode is not available.
4804 */
4805static int cik_rlc_resume(struct radeon_device *rdev)
4806{
4807 u32 i, size;
4808 u32 clear_state_info[3];
4809 const __be32 *fw_data;
4810
4811 if (!rdev->rlc_fw)
4812 return -EINVAL;
4813
4814 switch (rdev->family) {
4815 case CHIP_BONAIRE:
4816 default:
4817 size = BONAIRE_RLC_UCODE_SIZE;
4818 break;
4819 case CHIP_KAVERI:
4820 size = KV_RLC_UCODE_SIZE;
4821 break;
4822 case CHIP_KABINI:
4823 size = KB_RLC_UCODE_SIZE;
4824 break;
4825 }
4826
4827 cik_rlc_stop(rdev);
4828
4829 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4830 RREG32(GRBM_SOFT_RESET);
4831 udelay(50);
4832 WREG32(GRBM_SOFT_RESET, 0);
4833 RREG32(GRBM_SOFT_RESET);
4834 udelay(50);
4835
4836 WREG32(RLC_LB_CNTR_INIT, 0);
4837 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4838
4839 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4840 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4841 WREG32(RLC_LB_PARAMS, 0x00600408);
4842 WREG32(RLC_LB_CNTL, 0x80000004);
4843
4844 WREG32(RLC_MC_CNTL, 0);
4845 WREG32(RLC_UCODE_CNTL, 0);
4846
4847 fw_data = (const __be32 *)rdev->rlc_fw->data;
4848 WREG32(RLC_GPM_UCODE_ADDR, 0);
4849 for (i = 0; i < size; i++)
4850 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4851 WREG32(RLC_GPM_UCODE_ADDR, 0);
4852
4853 /* XXX */
4854 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4855 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4856 clear_state_info[2] = 0;//cik_default_size;
4857 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4858 for (i = 0; i < 3; i++)
4859 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4860 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4861
4862 cik_rlc_start(rdev);
4863
4864 return 0;
4865}
Alex Deuchera59781b2012-11-09 10:45:57 -05004866
4867/*
4868 * Interrupts
4869 * Starting with r6xx, interrupts are handled via a ring buffer.
4870 * Ring buffers are areas of GPU accessible memory that the GPU
4871 * writes interrupt vectors into and the host reads vectors out of.
4872 * There is a rptr (read pointer) that determines where the
4873 * host is currently reading, and a wptr (write pointer)
4874 * which determines where the GPU has written. When the
4875 * pointers are equal, the ring is idle. When the GPU
4876 * writes vectors to the ring buffer, it increments the
4877 * wptr. When there is an interrupt, the host then starts
4878 * fetching commands and processing them until the pointers are
4879 * equal again at which point it updates the rptr.
4880 */
4881
4882/**
4883 * cik_enable_interrupts - Enable the interrupt ring buffer
4884 *
4885 * @rdev: radeon_device pointer
4886 *
4887 * Enable the interrupt ring buffer (CIK).
4888 */
4889static void cik_enable_interrupts(struct radeon_device *rdev)
4890{
4891 u32 ih_cntl = RREG32(IH_CNTL);
4892 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4893
4894 ih_cntl |= ENABLE_INTR;
4895 ih_rb_cntl |= IH_RB_ENABLE;
4896 WREG32(IH_CNTL, ih_cntl);
4897 WREG32(IH_RB_CNTL, ih_rb_cntl);
4898 rdev->ih.enabled = true;
4899}
4900
4901/**
4902 * cik_disable_interrupts - Disable the interrupt ring buffer
4903 *
4904 * @rdev: radeon_device pointer
4905 *
4906 * Disable the interrupt ring buffer (CIK).
4907 */
4908static void cik_disable_interrupts(struct radeon_device *rdev)
4909{
4910 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4911 u32 ih_cntl = RREG32(IH_CNTL);
4912
4913 ih_rb_cntl &= ~IH_RB_ENABLE;
4914 ih_cntl &= ~ENABLE_INTR;
4915 WREG32(IH_RB_CNTL, ih_rb_cntl);
4916 WREG32(IH_CNTL, ih_cntl);
4917 /* set rptr, wptr to 0 */
4918 WREG32(IH_RB_RPTR, 0);
4919 WREG32(IH_RB_WPTR, 0);
4920 rdev->ih.enabled = false;
4921 rdev->ih.rptr = 0;
4922}
4923
4924/**
4925 * cik_disable_interrupt_state - Disable all interrupt sources
4926 *
4927 * @rdev: radeon_device pointer
4928 *
4929 * Clear all interrupt enable bits used by the driver (CIK).
4930 */
4931static void cik_disable_interrupt_state(struct radeon_device *rdev)
4932{
4933 u32 tmp;
4934
4935 /* gfx ring */
4936 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004937 /* sdma */
4938 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4939 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4940 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4941 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004942 /* compute queues */
4943 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4944 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4945 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4946 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4947 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4948 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4949 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4950 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4951 /* grbm */
4952 WREG32(GRBM_INT_CNTL, 0);
4953 /* vline/vblank, etc. */
4954 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4955 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4956 if (rdev->num_crtc >= 4) {
4957 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4958 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4959 }
4960 if (rdev->num_crtc >= 6) {
4961 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4963 }
4964
4965 /* dac hotplug */
4966 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4967
4968 /* digital hotplug */
4969 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4970 WREG32(DC_HPD1_INT_CONTROL, tmp);
4971 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4972 WREG32(DC_HPD2_INT_CONTROL, tmp);
4973 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4974 WREG32(DC_HPD3_INT_CONTROL, tmp);
4975 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4976 WREG32(DC_HPD4_INT_CONTROL, tmp);
4977 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978 WREG32(DC_HPD5_INT_CONTROL, tmp);
4979 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980 WREG32(DC_HPD6_INT_CONTROL, tmp);
4981
4982}
4983
4984/**
4985 * cik_irq_init - init and enable the interrupt ring
4986 *
4987 * @rdev: radeon_device pointer
4988 *
4989 * Allocate a ring buffer for the interrupt controller,
4990 * enable the RLC, disable interrupts, enable the IH
4991 * ring buffer and enable it (CIK).
4992 * Called at device load and reume.
4993 * Returns 0 for success, errors for failure.
4994 */
4995static int cik_irq_init(struct radeon_device *rdev)
4996{
4997 int ret = 0;
4998 int rb_bufsz;
4999 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5000
5001 /* allocate ring */
5002 ret = r600_ih_ring_alloc(rdev);
5003 if (ret)
5004 return ret;
5005
5006 /* disable irqs */
5007 cik_disable_interrupts(rdev);
5008
5009 /* init rlc */
5010 ret = cik_rlc_resume(rdev);
5011 if (ret) {
5012 r600_ih_ring_fini(rdev);
5013 return ret;
5014 }
5015
5016 /* setup interrupt control */
5017 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5018 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5019 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5020 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5021 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5022 */
5023 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5024 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5025 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5026 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5027
5028 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5029 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5030
5031 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5032 IH_WPTR_OVERFLOW_CLEAR |
5033 (rb_bufsz << 1));
5034
5035 if (rdev->wb.enabled)
5036 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5037
5038 /* set the writeback address whether it's enabled or not */
5039 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5040 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5041
5042 WREG32(IH_RB_CNTL, ih_rb_cntl);
5043
5044 /* set rptr, wptr to 0 */
5045 WREG32(IH_RB_RPTR, 0);
5046 WREG32(IH_RB_WPTR, 0);
5047
5048 /* Default settings for IH_CNTL (disabled at first) */
5049 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5050 /* RPTR_REARM only works if msi's are enabled */
5051 if (rdev->msi_enabled)
5052 ih_cntl |= RPTR_REARM;
5053 WREG32(IH_CNTL, ih_cntl);
5054
5055 /* force the active interrupt state to all disabled */
5056 cik_disable_interrupt_state(rdev);
5057
5058 pci_set_master(rdev->pdev);
5059
5060 /* enable irqs */
5061 cik_enable_interrupts(rdev);
5062
5063 return ret;
5064}
5065
5066/**
5067 * cik_irq_set - enable/disable interrupt sources
5068 *
5069 * @rdev: radeon_device pointer
5070 *
5071 * Enable interrupt sources on the GPU (vblanks, hpd,
5072 * etc.) (CIK).
5073 * Returns 0 for success, errors for failure.
5074 */
5075int cik_irq_set(struct radeon_device *rdev)
5076{
5077 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5078 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005079 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5080 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005081 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5082 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5083 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005084 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005085
5086 if (!rdev->irq.installed) {
5087 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5088 return -EINVAL;
5089 }
5090 /* don't enable anything if the ih is disabled */
5091 if (!rdev->ih.enabled) {
5092 cik_disable_interrupts(rdev);
5093 /* force the active interrupt state to all disabled */
5094 cik_disable_interrupt_state(rdev);
5095 return 0;
5096 }
5097
5098 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5099 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5100 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5101 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5102 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5104
Alex Deucher21a93e12013-04-09 12:47:11 -04005105 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5106 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5107
Alex Deucher2b0781a2013-04-09 14:26:16 -04005108 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5109 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5110 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5111 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5112 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5116
Alex Deuchera59781b2012-11-09 10:45:57 -05005117 /* enable CP interrupts on all rings */
5118 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5119 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5120 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5121 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005122 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5123 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5124 DRM_DEBUG("si_irq_set: sw int cp1\n");
5125 if (ring->me == 1) {
5126 switch (ring->pipe) {
5127 case 0:
5128 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5129 break;
5130 case 1:
5131 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5132 break;
5133 case 2:
5134 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5135 break;
5136 case 3:
5137 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5138 break;
5139 default:
5140 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5141 break;
5142 }
5143 } else if (ring->me == 2) {
5144 switch (ring->pipe) {
5145 case 0:
5146 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5147 break;
5148 case 1:
5149 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5150 break;
5151 case 2:
5152 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5153 break;
5154 case 3:
5155 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5156 break;
5157 default:
5158 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5159 break;
5160 }
5161 } else {
5162 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5163 }
5164 }
5165 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5166 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5167 DRM_DEBUG("si_irq_set: sw int cp2\n");
5168 if (ring->me == 1) {
5169 switch (ring->pipe) {
5170 case 0:
5171 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5172 break;
5173 case 1:
5174 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5175 break;
5176 case 2:
5177 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5178 break;
5179 case 3:
5180 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5181 break;
5182 default:
5183 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5184 break;
5185 }
5186 } else if (ring->me == 2) {
5187 switch (ring->pipe) {
5188 case 0:
5189 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5190 break;
5191 case 1:
5192 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5193 break;
5194 case 2:
5195 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5196 break;
5197 case 3:
5198 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5199 break;
5200 default:
5201 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5202 break;
5203 }
5204 } else {
5205 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5206 }
5207 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005208
Alex Deucher21a93e12013-04-09 12:47:11 -04005209 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5210 DRM_DEBUG("cik_irq_set: sw int dma\n");
5211 dma_cntl |= TRAP_ENABLE;
5212 }
5213
5214 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5215 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5216 dma_cntl1 |= TRAP_ENABLE;
5217 }
5218
Alex Deuchera59781b2012-11-09 10:45:57 -05005219 if (rdev->irq.crtc_vblank_int[0] ||
5220 atomic_read(&rdev->irq.pflip[0])) {
5221 DRM_DEBUG("cik_irq_set: vblank 0\n");
5222 crtc1 |= VBLANK_INTERRUPT_MASK;
5223 }
5224 if (rdev->irq.crtc_vblank_int[1] ||
5225 atomic_read(&rdev->irq.pflip[1])) {
5226 DRM_DEBUG("cik_irq_set: vblank 1\n");
5227 crtc2 |= VBLANK_INTERRUPT_MASK;
5228 }
5229 if (rdev->irq.crtc_vblank_int[2] ||
5230 atomic_read(&rdev->irq.pflip[2])) {
5231 DRM_DEBUG("cik_irq_set: vblank 2\n");
5232 crtc3 |= VBLANK_INTERRUPT_MASK;
5233 }
5234 if (rdev->irq.crtc_vblank_int[3] ||
5235 atomic_read(&rdev->irq.pflip[3])) {
5236 DRM_DEBUG("cik_irq_set: vblank 3\n");
5237 crtc4 |= VBLANK_INTERRUPT_MASK;
5238 }
5239 if (rdev->irq.crtc_vblank_int[4] ||
5240 atomic_read(&rdev->irq.pflip[4])) {
5241 DRM_DEBUG("cik_irq_set: vblank 4\n");
5242 crtc5 |= VBLANK_INTERRUPT_MASK;
5243 }
5244 if (rdev->irq.crtc_vblank_int[5] ||
5245 atomic_read(&rdev->irq.pflip[5])) {
5246 DRM_DEBUG("cik_irq_set: vblank 5\n");
5247 crtc6 |= VBLANK_INTERRUPT_MASK;
5248 }
5249 if (rdev->irq.hpd[0]) {
5250 DRM_DEBUG("cik_irq_set: hpd 1\n");
5251 hpd1 |= DC_HPDx_INT_EN;
5252 }
5253 if (rdev->irq.hpd[1]) {
5254 DRM_DEBUG("cik_irq_set: hpd 2\n");
5255 hpd2 |= DC_HPDx_INT_EN;
5256 }
5257 if (rdev->irq.hpd[2]) {
5258 DRM_DEBUG("cik_irq_set: hpd 3\n");
5259 hpd3 |= DC_HPDx_INT_EN;
5260 }
5261 if (rdev->irq.hpd[3]) {
5262 DRM_DEBUG("cik_irq_set: hpd 4\n");
5263 hpd4 |= DC_HPDx_INT_EN;
5264 }
5265 if (rdev->irq.hpd[4]) {
5266 DRM_DEBUG("cik_irq_set: hpd 5\n");
5267 hpd5 |= DC_HPDx_INT_EN;
5268 }
5269 if (rdev->irq.hpd[5]) {
5270 DRM_DEBUG("cik_irq_set: hpd 6\n");
5271 hpd6 |= DC_HPDx_INT_EN;
5272 }
5273
5274 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5275
Alex Deucher21a93e12013-04-09 12:47:11 -04005276 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5277 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5278
Alex Deucher2b0781a2013-04-09 14:26:16 -04005279 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5280 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5281 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5282 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5283 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5284 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5285 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5286 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5287
Alex Deuchera59781b2012-11-09 10:45:57 -05005288 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5289
5290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5291 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5292 if (rdev->num_crtc >= 4) {
5293 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5294 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5295 }
5296 if (rdev->num_crtc >= 6) {
5297 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5299 }
5300
5301 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5302 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5303 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5304 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5305 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5306 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5307
5308 return 0;
5309}
5310
5311/**
5312 * cik_irq_ack - ack interrupt sources
5313 *
5314 * @rdev: radeon_device pointer
5315 *
5316 * Ack interrupt sources on the GPU (vblanks, hpd,
5317 * etc.) (CIK). Certain interrupts sources are sw
5318 * generated and do not require an explicit ack.
5319 */
5320static inline void cik_irq_ack(struct radeon_device *rdev)
5321{
5322 u32 tmp;
5323
5324 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5325 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5326 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5327 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5328 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5329 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5330 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5331
5332 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5333 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5334 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5335 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5336 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5338 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5340
5341 if (rdev->num_crtc >= 4) {
5342 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5343 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5344 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5345 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5346 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5348 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5350 }
5351
5352 if (rdev->num_crtc >= 6) {
5353 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5354 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5355 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5356 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5357 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5358 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5359 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5360 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5361 }
5362
5363 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5364 tmp = RREG32(DC_HPD1_INT_CONTROL);
5365 tmp |= DC_HPDx_INT_ACK;
5366 WREG32(DC_HPD1_INT_CONTROL, tmp);
5367 }
5368 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5369 tmp = RREG32(DC_HPD2_INT_CONTROL);
5370 tmp |= DC_HPDx_INT_ACK;
5371 WREG32(DC_HPD2_INT_CONTROL, tmp);
5372 }
5373 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5374 tmp = RREG32(DC_HPD3_INT_CONTROL);
5375 tmp |= DC_HPDx_INT_ACK;
5376 WREG32(DC_HPD3_INT_CONTROL, tmp);
5377 }
5378 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5379 tmp = RREG32(DC_HPD4_INT_CONTROL);
5380 tmp |= DC_HPDx_INT_ACK;
5381 WREG32(DC_HPD4_INT_CONTROL, tmp);
5382 }
5383 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5384 tmp = RREG32(DC_HPD5_INT_CONTROL);
5385 tmp |= DC_HPDx_INT_ACK;
5386 WREG32(DC_HPD5_INT_CONTROL, tmp);
5387 }
5388 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5389 tmp = RREG32(DC_HPD5_INT_CONTROL);
5390 tmp |= DC_HPDx_INT_ACK;
5391 WREG32(DC_HPD6_INT_CONTROL, tmp);
5392 }
5393}
5394
5395/**
5396 * cik_irq_disable - disable interrupts
5397 *
5398 * @rdev: radeon_device pointer
5399 *
5400 * Disable interrupts on the hw (CIK).
5401 */
5402static void cik_irq_disable(struct radeon_device *rdev)
5403{
5404 cik_disable_interrupts(rdev);
5405 /* Wait and acknowledge irq */
5406 mdelay(1);
5407 cik_irq_ack(rdev);
5408 cik_disable_interrupt_state(rdev);
5409}
5410
5411/**
5412 * cik_irq_disable - disable interrupts for suspend
5413 *
5414 * @rdev: radeon_device pointer
5415 *
5416 * Disable interrupts and stop the RLC (CIK).
5417 * Used for suspend.
5418 */
5419static void cik_irq_suspend(struct radeon_device *rdev)
5420{
5421 cik_irq_disable(rdev);
5422 cik_rlc_stop(rdev);
5423}
5424
5425/**
5426 * cik_irq_fini - tear down interrupt support
5427 *
5428 * @rdev: radeon_device pointer
5429 *
5430 * Disable interrupts on the hw and free the IH ring
5431 * buffer (CIK).
5432 * Used for driver unload.
5433 */
5434static void cik_irq_fini(struct radeon_device *rdev)
5435{
5436 cik_irq_suspend(rdev);
5437 r600_ih_ring_fini(rdev);
5438}
5439
5440/**
5441 * cik_get_ih_wptr - get the IH ring buffer wptr
5442 *
5443 * @rdev: radeon_device pointer
5444 *
5445 * Get the IH ring buffer wptr from either the register
5446 * or the writeback memory buffer (CIK). Also check for
5447 * ring buffer overflow and deal with it.
5448 * Used by cik_irq_process().
5449 * Returns the value of the wptr.
5450 */
5451static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5452{
5453 u32 wptr, tmp;
5454
5455 if (rdev->wb.enabled)
5456 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5457 else
5458 wptr = RREG32(IH_RB_WPTR);
5459
5460 if (wptr & RB_OVERFLOW) {
5461 /* When a ring buffer overflow happen start parsing interrupt
5462 * from the last not overwritten vector (wptr + 16). Hopefully
5463 * this should allow us to catchup.
5464 */
5465 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5466 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5467 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5468 tmp = RREG32(IH_RB_CNTL);
5469 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5470 WREG32(IH_RB_CNTL, tmp);
5471 }
5472 return (wptr & rdev->ih.ptr_mask);
5473}
5474
5475/* CIK IV Ring
5476 * Each IV ring entry is 128 bits:
5477 * [7:0] - interrupt source id
5478 * [31:8] - reserved
5479 * [59:32] - interrupt source data
5480 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005481 * [71:64] - RINGID
5482 * CP:
5483 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005484 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5485 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5486 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5487 * PIPE_ID - ME0 0=3D
5488 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005489 * SDMA:
5490 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5491 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5492 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005493 * [79:72] - VMID
5494 * [95:80] - PASID
5495 * [127:96] - reserved
5496 */
5497/**
5498 * cik_irq_process - interrupt handler
5499 *
5500 * @rdev: radeon_device pointer
5501 *
5502 * Interrupt hander (CIK). Walk the IH ring,
5503 * ack interrupts and schedule work to handle
5504 * interrupt events.
5505 * Returns irq process return code.
5506 */
5507int cik_irq_process(struct radeon_device *rdev)
5508{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005509 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5510 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005511 u32 wptr;
5512 u32 rptr;
5513 u32 src_id, src_data, ring_id;
5514 u8 me_id, pipe_id, queue_id;
5515 u32 ring_index;
5516 bool queue_hotplug = false;
5517 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04005518 u32 addr, status, mc_client;
Alex Deuchera59781b2012-11-09 10:45:57 -05005519
5520 if (!rdev->ih.enabled || rdev->shutdown)
5521 return IRQ_NONE;
5522
5523 wptr = cik_get_ih_wptr(rdev);
5524
5525restart_ih:
5526 /* is somebody else already processing irqs? */
5527 if (atomic_xchg(&rdev->ih.lock, 1))
5528 return IRQ_NONE;
5529
5530 rptr = rdev->ih.rptr;
5531 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5532
5533 /* Order reading of wptr vs. reading of IH ring data */
5534 rmb();
5535
5536 /* display interrupts */
5537 cik_irq_ack(rdev);
5538
5539 while (rptr != wptr) {
5540 /* wptr/rptr are in bytes! */
5541 ring_index = rptr / 4;
5542 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5543 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5544 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005545
5546 switch (src_id) {
5547 case 1: /* D1 vblank/vline */
5548 switch (src_data) {
5549 case 0: /* D1 vblank */
5550 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5551 if (rdev->irq.crtc_vblank_int[0]) {
5552 drm_handle_vblank(rdev->ddev, 0);
5553 rdev->pm.vblank_sync = true;
5554 wake_up(&rdev->irq.vblank_queue);
5555 }
5556 if (atomic_read(&rdev->irq.pflip[0]))
5557 radeon_crtc_handle_flip(rdev, 0);
5558 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5559 DRM_DEBUG("IH: D1 vblank\n");
5560 }
5561 break;
5562 case 1: /* D1 vline */
5563 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5564 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5565 DRM_DEBUG("IH: D1 vline\n");
5566 }
5567 break;
5568 default:
5569 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5570 break;
5571 }
5572 break;
5573 case 2: /* D2 vblank/vline */
5574 switch (src_data) {
5575 case 0: /* D2 vblank */
5576 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5577 if (rdev->irq.crtc_vblank_int[1]) {
5578 drm_handle_vblank(rdev->ddev, 1);
5579 rdev->pm.vblank_sync = true;
5580 wake_up(&rdev->irq.vblank_queue);
5581 }
5582 if (atomic_read(&rdev->irq.pflip[1]))
5583 radeon_crtc_handle_flip(rdev, 1);
5584 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5585 DRM_DEBUG("IH: D2 vblank\n");
5586 }
5587 break;
5588 case 1: /* D2 vline */
5589 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5590 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5591 DRM_DEBUG("IH: D2 vline\n");
5592 }
5593 break;
5594 default:
5595 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5596 break;
5597 }
5598 break;
5599 case 3: /* D3 vblank/vline */
5600 switch (src_data) {
5601 case 0: /* D3 vblank */
5602 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5603 if (rdev->irq.crtc_vblank_int[2]) {
5604 drm_handle_vblank(rdev->ddev, 2);
5605 rdev->pm.vblank_sync = true;
5606 wake_up(&rdev->irq.vblank_queue);
5607 }
5608 if (atomic_read(&rdev->irq.pflip[2]))
5609 radeon_crtc_handle_flip(rdev, 2);
5610 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5611 DRM_DEBUG("IH: D3 vblank\n");
5612 }
5613 break;
5614 case 1: /* D3 vline */
5615 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5616 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5617 DRM_DEBUG("IH: D3 vline\n");
5618 }
5619 break;
5620 default:
5621 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5622 break;
5623 }
5624 break;
5625 case 4: /* D4 vblank/vline */
5626 switch (src_data) {
5627 case 0: /* D4 vblank */
5628 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5629 if (rdev->irq.crtc_vblank_int[3]) {
5630 drm_handle_vblank(rdev->ddev, 3);
5631 rdev->pm.vblank_sync = true;
5632 wake_up(&rdev->irq.vblank_queue);
5633 }
5634 if (atomic_read(&rdev->irq.pflip[3]))
5635 radeon_crtc_handle_flip(rdev, 3);
5636 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5637 DRM_DEBUG("IH: D4 vblank\n");
5638 }
5639 break;
5640 case 1: /* D4 vline */
5641 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5642 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5643 DRM_DEBUG("IH: D4 vline\n");
5644 }
5645 break;
5646 default:
5647 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5648 break;
5649 }
5650 break;
5651 case 5: /* D5 vblank/vline */
5652 switch (src_data) {
5653 case 0: /* D5 vblank */
5654 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5655 if (rdev->irq.crtc_vblank_int[4]) {
5656 drm_handle_vblank(rdev->ddev, 4);
5657 rdev->pm.vblank_sync = true;
5658 wake_up(&rdev->irq.vblank_queue);
5659 }
5660 if (atomic_read(&rdev->irq.pflip[4]))
5661 radeon_crtc_handle_flip(rdev, 4);
5662 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5663 DRM_DEBUG("IH: D5 vblank\n");
5664 }
5665 break;
5666 case 1: /* D5 vline */
5667 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5668 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5669 DRM_DEBUG("IH: D5 vline\n");
5670 }
5671 break;
5672 default:
5673 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5674 break;
5675 }
5676 break;
5677 case 6: /* D6 vblank/vline */
5678 switch (src_data) {
5679 case 0: /* D6 vblank */
5680 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5681 if (rdev->irq.crtc_vblank_int[5]) {
5682 drm_handle_vblank(rdev->ddev, 5);
5683 rdev->pm.vblank_sync = true;
5684 wake_up(&rdev->irq.vblank_queue);
5685 }
5686 if (atomic_read(&rdev->irq.pflip[5]))
5687 radeon_crtc_handle_flip(rdev, 5);
5688 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5689 DRM_DEBUG("IH: D6 vblank\n");
5690 }
5691 break;
5692 case 1: /* D6 vline */
5693 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5694 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5695 DRM_DEBUG("IH: D6 vline\n");
5696 }
5697 break;
5698 default:
5699 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5700 break;
5701 }
5702 break;
5703 case 42: /* HPD hotplug */
5704 switch (src_data) {
5705 case 0:
5706 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5707 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5708 queue_hotplug = true;
5709 DRM_DEBUG("IH: HPD1\n");
5710 }
5711 break;
5712 case 1:
5713 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5714 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5715 queue_hotplug = true;
5716 DRM_DEBUG("IH: HPD2\n");
5717 }
5718 break;
5719 case 2:
5720 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5721 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5722 queue_hotplug = true;
5723 DRM_DEBUG("IH: HPD3\n");
5724 }
5725 break;
5726 case 3:
5727 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5728 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5729 queue_hotplug = true;
5730 DRM_DEBUG("IH: HPD4\n");
5731 }
5732 break;
5733 case 4:
5734 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5735 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5736 queue_hotplug = true;
5737 DRM_DEBUG("IH: HPD5\n");
5738 }
5739 break;
5740 case 5:
5741 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5742 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5743 queue_hotplug = true;
5744 DRM_DEBUG("IH: HPD6\n");
5745 }
5746 break;
5747 default:
5748 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5749 break;
5750 }
5751 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005752 case 146:
5753 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04005754 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5755 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5756 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04005757 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5758 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005759 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04005760 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005761 status);
5762 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04005763 /* reset addr and status */
5764 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5765 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005766 case 176: /* GFX RB CP_INT */
5767 case 177: /* GFX IB CP_INT */
5768 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5769 break;
5770 case 181: /* CP EOP event */
5771 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005772 /* XXX check the bitfield order! */
5773 me_id = (ring_id & 0x60) >> 5;
5774 pipe_id = (ring_id & 0x18) >> 3;
5775 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005776 switch (me_id) {
5777 case 0:
5778 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5779 break;
5780 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005781 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005782 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5783 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5784 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5785 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005786 break;
5787 }
5788 break;
5789 case 184: /* CP Privileged reg access */
5790 DRM_ERROR("Illegal register access in command stream\n");
5791 /* XXX check the bitfield order! */
5792 me_id = (ring_id & 0x60) >> 5;
5793 pipe_id = (ring_id & 0x18) >> 3;
5794 queue_id = (ring_id & 0x7) >> 0;
5795 switch (me_id) {
5796 case 0:
5797 /* This results in a full GPU reset, but all we need to do is soft
5798 * reset the CP for gfx
5799 */
5800 queue_reset = true;
5801 break;
5802 case 1:
5803 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005804 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005805 break;
5806 case 2:
5807 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005808 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005809 break;
5810 }
5811 break;
5812 case 185: /* CP Privileged inst */
5813 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005814 /* XXX check the bitfield order! */
5815 me_id = (ring_id & 0x60) >> 5;
5816 pipe_id = (ring_id & 0x18) >> 3;
5817 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005818 switch (me_id) {
5819 case 0:
5820 /* This results in a full GPU reset, but all we need to do is soft
5821 * reset the CP for gfx
5822 */
5823 queue_reset = true;
5824 break;
5825 case 1:
5826 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005827 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005828 break;
5829 case 2:
5830 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005831 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005832 break;
5833 }
5834 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005835 case 224: /* SDMA trap event */
5836 /* XXX check the bitfield order! */
5837 me_id = (ring_id & 0x3) >> 0;
5838 queue_id = (ring_id & 0xc) >> 2;
5839 DRM_DEBUG("IH: SDMA trap\n");
5840 switch (me_id) {
5841 case 0:
5842 switch (queue_id) {
5843 case 0:
5844 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5845 break;
5846 case 1:
5847 /* XXX compute */
5848 break;
5849 case 2:
5850 /* XXX compute */
5851 break;
5852 }
5853 break;
5854 case 1:
5855 switch (queue_id) {
5856 case 0:
5857 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5858 break;
5859 case 1:
5860 /* XXX compute */
5861 break;
5862 case 2:
5863 /* XXX compute */
5864 break;
5865 }
5866 break;
5867 }
5868 break;
5869 case 241: /* SDMA Privileged inst */
5870 case 247: /* SDMA Privileged inst */
5871 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5872 /* XXX check the bitfield order! */
5873 me_id = (ring_id & 0x3) >> 0;
5874 queue_id = (ring_id & 0xc) >> 2;
5875 switch (me_id) {
5876 case 0:
5877 switch (queue_id) {
5878 case 0:
5879 queue_reset = true;
5880 break;
5881 case 1:
5882 /* XXX compute */
5883 queue_reset = true;
5884 break;
5885 case 2:
5886 /* XXX compute */
5887 queue_reset = true;
5888 break;
5889 }
5890 break;
5891 case 1:
5892 switch (queue_id) {
5893 case 0:
5894 queue_reset = true;
5895 break;
5896 case 1:
5897 /* XXX compute */
5898 queue_reset = true;
5899 break;
5900 case 2:
5901 /* XXX compute */
5902 queue_reset = true;
5903 break;
5904 }
5905 break;
5906 }
5907 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005908 case 233: /* GUI IDLE */
5909 DRM_DEBUG("IH: GUI idle\n");
5910 break;
5911 default:
5912 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5913 break;
5914 }
5915
5916 /* wptr/rptr are in bytes! */
5917 rptr += 16;
5918 rptr &= rdev->ih.ptr_mask;
5919 }
5920 if (queue_hotplug)
5921 schedule_work(&rdev->hotplug_work);
5922 if (queue_reset)
5923 schedule_work(&rdev->reset_work);
5924 rdev->ih.rptr = rptr;
5925 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5926 atomic_set(&rdev->ih.lock, 0);
5927
5928 /* make sure wptr hasn't changed while processing */
5929 wptr = cik_get_ih_wptr(rdev);
5930 if (wptr != rptr)
5931 goto restart_ih;
5932
5933 return IRQ_HANDLED;
5934}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005935
5936/*
5937 * startup/shutdown callbacks
5938 */
5939/**
5940 * cik_startup - program the asic to a functional state
5941 *
5942 * @rdev: radeon_device pointer
5943 *
5944 * Programs the asic to a functional state (CIK).
5945 * Called by cik_init() and cik_resume().
5946 * Returns 0 for success, error for failure.
5947 */
5948static int cik_startup(struct radeon_device *rdev)
5949{
5950 struct radeon_ring *ring;
5951 int r;
5952
Alex Deucher8a7cd272013-08-06 11:29:39 -04005953 /* enable pcie gen2/3 link */
5954 cik_pcie_gen3_enable(rdev);
5955
Alex Deucher6fab3feb2013-08-04 12:13:17 -04005956 cik_mc_program(rdev);
5957
Alex Deucher7bf94a22012-08-17 11:48:29 -04005958 if (rdev->flags & RADEON_IS_IGP) {
5959 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5960 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5961 r = cik_init_microcode(rdev);
5962 if (r) {
5963 DRM_ERROR("Failed to load firmware!\n");
5964 return r;
5965 }
5966 }
5967 } else {
5968 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5969 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5970 !rdev->mc_fw) {
5971 r = cik_init_microcode(rdev);
5972 if (r) {
5973 DRM_ERROR("Failed to load firmware!\n");
5974 return r;
5975 }
5976 }
5977
5978 r = ci_mc_load_microcode(rdev);
5979 if (r) {
5980 DRM_ERROR("Failed to load MC firmware!\n");
5981 return r;
5982 }
5983 }
5984
5985 r = r600_vram_scratch_init(rdev);
5986 if (r)
5987 return r;
5988
Alex Deucher7bf94a22012-08-17 11:48:29 -04005989 r = cik_pcie_gart_enable(rdev);
5990 if (r)
5991 return r;
5992 cik_gpu_init(rdev);
5993
5994 /* allocate rlc buffers */
5995 r = si_rlc_init(rdev);
5996 if (r) {
5997 DRM_ERROR("Failed to init rlc BOs!\n");
5998 return r;
5999 }
6000
6001 /* allocate wb buffer */
6002 r = radeon_wb_init(rdev);
6003 if (r)
6004 return r;
6005
Alex Deucher963e81f2013-06-26 17:37:11 -04006006 /* allocate mec buffers */
6007 r = cik_mec_init(rdev);
6008 if (r) {
6009 DRM_ERROR("Failed to init MEC BOs!\n");
6010 return r;
6011 }
6012
Alex Deucher7bf94a22012-08-17 11:48:29 -04006013 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6014 if (r) {
6015 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6016 return r;
6017 }
6018
Alex Deucher963e81f2013-06-26 17:37:11 -04006019 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6020 if (r) {
6021 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022 return r;
6023 }
6024
6025 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6026 if (r) {
6027 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028 return r;
6029 }
6030
Alex Deucher7bf94a22012-08-17 11:48:29 -04006031 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6032 if (r) {
6033 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6034 return r;
6035 }
6036
6037 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6038 if (r) {
6039 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040 return r;
6041 }
6042
Christian König87167bb2013-04-09 13:39:21 -04006043 r = cik_uvd_resume(rdev);
6044 if (!r) {
6045 r = radeon_fence_driver_start_ring(rdev,
6046 R600_RING_TYPE_UVD_INDEX);
6047 if (r)
6048 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6049 }
6050 if (r)
6051 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6052
Alex Deucher7bf94a22012-08-17 11:48:29 -04006053 /* Enable IRQ */
6054 if (!rdev->irq.installed) {
6055 r = radeon_irq_kms_init(rdev);
6056 if (r)
6057 return r;
6058 }
6059
6060 r = cik_irq_init(rdev);
6061 if (r) {
6062 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6063 radeon_irq_kms_fini(rdev);
6064 return r;
6065 }
6066 cik_irq_set(rdev);
6067
6068 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6069 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6070 CP_RB0_RPTR, CP_RB0_WPTR,
6071 0, 0xfffff, RADEON_CP_PACKET2);
6072 if (r)
6073 return r;
6074
Alex Deucher963e81f2013-06-26 17:37:11 -04006075 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006076 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006077 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6078 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6079 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006080 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006081 if (r)
6082 return r;
6083 ring->me = 1; /* first MEC */
6084 ring->pipe = 0; /* first pipe */
6085 ring->queue = 0; /* first queue */
6086 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6087
Alex Deucher2615b532013-06-03 11:21:58 -04006088 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006089 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6090 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6091 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006092 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006093 if (r)
6094 return r;
6095 /* dGPU only have 1 MEC */
6096 ring->me = 1; /* first MEC */
6097 ring->pipe = 0; /* first pipe */
6098 ring->queue = 1; /* second queue */
6099 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6100
Alex Deucher7bf94a22012-08-17 11:48:29 -04006101 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6102 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6103 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6104 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6105 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6106 if (r)
6107 return r;
6108
6109 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6110 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6111 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6112 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6113 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6114 if (r)
6115 return r;
6116
6117 r = cik_cp_resume(rdev);
6118 if (r)
6119 return r;
6120
6121 r = cik_sdma_resume(rdev);
6122 if (r)
6123 return r;
6124
Christian König87167bb2013-04-09 13:39:21 -04006125 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6126 if (ring->ring_size) {
6127 r = radeon_ring_init(rdev, ring, ring->ring_size,
6128 R600_WB_UVD_RPTR_OFFSET,
6129 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6130 0, 0xfffff, RADEON_CP_PACKET2);
6131 if (!r)
6132 r = r600_uvd_init(rdev);
6133 if (r)
6134 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6135 }
6136
Alex Deucher7bf94a22012-08-17 11:48:29 -04006137 r = radeon_ib_pool_init(rdev);
6138 if (r) {
6139 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6140 return r;
6141 }
6142
6143 r = radeon_vm_manager_init(rdev);
6144 if (r) {
6145 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6146 return r;
6147 }
6148
6149 return 0;
6150}
6151
6152/**
6153 * cik_resume - resume the asic to a functional state
6154 *
6155 * @rdev: radeon_device pointer
6156 *
6157 * Programs the asic to a functional state (CIK).
6158 * Called at resume.
6159 * Returns 0 for success, error for failure.
6160 */
6161int cik_resume(struct radeon_device *rdev)
6162{
6163 int r;
6164
6165 /* post card */
6166 atom_asic_init(rdev->mode_info.atom_context);
6167
Alex Deucher0aafd312013-04-09 14:43:30 -04006168 /* init golden registers */
6169 cik_init_golden_registers(rdev);
6170
Alex Deucher7bf94a22012-08-17 11:48:29 -04006171 rdev->accel_working = true;
6172 r = cik_startup(rdev);
6173 if (r) {
6174 DRM_ERROR("cik startup failed on resume\n");
6175 rdev->accel_working = false;
6176 return r;
6177 }
6178
6179 return r;
6180
6181}
6182
6183/**
6184 * cik_suspend - suspend the asic
6185 *
6186 * @rdev: radeon_device pointer
6187 *
6188 * Bring the chip into a state suitable for suspend (CIK).
6189 * Called at suspend.
6190 * Returns 0 for success.
6191 */
6192int cik_suspend(struct radeon_device *rdev)
6193{
6194 radeon_vm_manager_fini(rdev);
6195 cik_cp_enable(rdev, false);
6196 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02006197 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006198 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006199 cik_irq_suspend(rdev);
6200 radeon_wb_disable(rdev);
6201 cik_pcie_gart_disable(rdev);
6202 return 0;
6203}
6204
6205/* Plan is to move initialization in that function and use
6206 * helper function so that radeon_device_init pretty much
6207 * do nothing more than calling asic specific function. This
6208 * should also allow to remove a bunch of callback function
6209 * like vram_info.
6210 */
6211/**
6212 * cik_init - asic specific driver and hw init
6213 *
6214 * @rdev: radeon_device pointer
6215 *
6216 * Setup asic specific driver variables and program the hw
6217 * to a functional state (CIK).
6218 * Called at driver startup.
6219 * Returns 0 for success, errors for failure.
6220 */
6221int cik_init(struct radeon_device *rdev)
6222{
6223 struct radeon_ring *ring;
6224 int r;
6225
6226 /* Read BIOS */
6227 if (!radeon_get_bios(rdev)) {
6228 if (ASIC_IS_AVIVO(rdev))
6229 return -EINVAL;
6230 }
6231 /* Must be an ATOMBIOS */
6232 if (!rdev->is_atom_bios) {
6233 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6234 return -EINVAL;
6235 }
6236 r = radeon_atombios_init(rdev);
6237 if (r)
6238 return r;
6239
6240 /* Post card if necessary */
6241 if (!radeon_card_posted(rdev)) {
6242 if (!rdev->bios) {
6243 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6244 return -EINVAL;
6245 }
6246 DRM_INFO("GPU not posted. posting now...\n");
6247 atom_asic_init(rdev->mode_info.atom_context);
6248 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006249 /* init golden registers */
6250 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006251 /* Initialize scratch registers */
6252 cik_scratch_init(rdev);
6253 /* Initialize surface registers */
6254 radeon_surface_init(rdev);
6255 /* Initialize clocks */
6256 radeon_get_clock_info(rdev->ddev);
6257
6258 /* Fence driver */
6259 r = radeon_fence_driver_init(rdev);
6260 if (r)
6261 return r;
6262
6263 /* initialize memory controller */
6264 r = cik_mc_init(rdev);
6265 if (r)
6266 return r;
6267 /* Memory manager */
6268 r = radeon_bo_init(rdev);
6269 if (r)
6270 return r;
6271
6272 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6273 ring->ring_obj = NULL;
6274 r600_ring_init(rdev, ring, 1024 * 1024);
6275
Alex Deucher963e81f2013-06-26 17:37:11 -04006276 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6277 ring->ring_obj = NULL;
6278 r600_ring_init(rdev, ring, 1024 * 1024);
6279 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6280 if (r)
6281 return r;
6282
6283 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6284 ring->ring_obj = NULL;
6285 r600_ring_init(rdev, ring, 1024 * 1024);
6286 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6287 if (r)
6288 return r;
6289
Alex Deucher7bf94a22012-08-17 11:48:29 -04006290 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6291 ring->ring_obj = NULL;
6292 r600_ring_init(rdev, ring, 256 * 1024);
6293
6294 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6295 ring->ring_obj = NULL;
6296 r600_ring_init(rdev, ring, 256 * 1024);
6297
Christian König87167bb2013-04-09 13:39:21 -04006298 r = radeon_uvd_init(rdev);
6299 if (!r) {
6300 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6301 ring->ring_obj = NULL;
6302 r600_ring_init(rdev, ring, 4096);
6303 }
6304
Alex Deucher7bf94a22012-08-17 11:48:29 -04006305 rdev->ih.ring_obj = NULL;
6306 r600_ih_ring_init(rdev, 64 * 1024);
6307
6308 r = r600_pcie_gart_init(rdev);
6309 if (r)
6310 return r;
6311
6312 rdev->accel_working = true;
6313 r = cik_startup(rdev);
6314 if (r) {
6315 dev_err(rdev->dev, "disabling GPU acceleration\n");
6316 cik_cp_fini(rdev);
6317 cik_sdma_fini(rdev);
6318 cik_irq_fini(rdev);
6319 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006320 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006321 radeon_wb_fini(rdev);
6322 radeon_ib_pool_fini(rdev);
6323 radeon_vm_manager_fini(rdev);
6324 radeon_irq_kms_fini(rdev);
6325 cik_pcie_gart_fini(rdev);
6326 rdev->accel_working = false;
6327 }
6328
6329 /* Don't start up if the MC ucode is missing.
6330 * The default clocks and voltages before the MC ucode
6331 * is loaded are not suffient for advanced operations.
6332 */
6333 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6334 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6335 return -EINVAL;
6336 }
6337
6338 return 0;
6339}
6340
6341/**
6342 * cik_fini - asic specific driver and hw fini
6343 *
6344 * @rdev: radeon_device pointer
6345 *
6346 * Tear down the asic specific driver variables and program the hw
6347 * to an idle state (CIK).
6348 * Called at driver unload.
6349 */
6350void cik_fini(struct radeon_device *rdev)
6351{
6352 cik_cp_fini(rdev);
6353 cik_sdma_fini(rdev);
6354 cik_irq_fini(rdev);
6355 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006356 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006357 radeon_wb_fini(rdev);
6358 radeon_vm_manager_fini(rdev);
6359 radeon_ib_pool_fini(rdev);
6360 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02006361 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006362 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006363 cik_pcie_gart_fini(rdev);
6364 r600_vram_scratch_fini(rdev);
6365 radeon_gem_fini(rdev);
6366 radeon_fence_driver_fini(rdev);
6367 radeon_bo_fini(rdev);
6368 radeon_atombios_fini(rdev);
6369 kfree(rdev->bios);
6370 rdev->bios = NULL;
6371}
Alex Deuchercd84a272012-07-20 17:13:13 -04006372
6373/* display watermark setup */
6374/**
6375 * dce8_line_buffer_adjust - Set up the line buffer
6376 *
6377 * @rdev: radeon_device pointer
6378 * @radeon_crtc: the selected display controller
6379 * @mode: the current display mode on the selected display
6380 * controller
6381 *
6382 * Setup up the line buffer allocation for
6383 * the selected display controller (CIK).
6384 * Returns the line buffer size in pixels.
6385 */
6386static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6387 struct radeon_crtc *radeon_crtc,
6388 struct drm_display_mode *mode)
6389{
6390 u32 tmp;
6391
6392 /*
6393 * Line Buffer Setup
6394 * There are 6 line buffers, one for each display controllers.
6395 * There are 3 partitions per LB. Select the number of partitions
6396 * to enable based on the display width. For display widths larger
6397 * than 4096, you need use to use 2 display controllers and combine
6398 * them using the stereo blender.
6399 */
6400 if (radeon_crtc->base.enabled && mode) {
6401 if (mode->crtc_hdisplay < 1920)
6402 tmp = 1;
6403 else if (mode->crtc_hdisplay < 2560)
6404 tmp = 2;
6405 else if (mode->crtc_hdisplay < 4096)
6406 tmp = 0;
6407 else {
6408 DRM_DEBUG_KMS("Mode too big for LB!\n");
6409 tmp = 0;
6410 }
6411 } else
6412 tmp = 1;
6413
6414 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6415 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6416
6417 if (radeon_crtc->base.enabled && mode) {
6418 switch (tmp) {
6419 case 0:
6420 default:
6421 return 4096 * 2;
6422 case 1:
6423 return 1920 * 2;
6424 case 2:
6425 return 2560 * 2;
6426 }
6427 }
6428
6429 /* controller not enabled, so no lb used */
6430 return 0;
6431}
6432
6433/**
6434 * cik_get_number_of_dram_channels - get the number of dram channels
6435 *
6436 * @rdev: radeon_device pointer
6437 *
6438 * Look up the number of video ram channels (CIK).
6439 * Used for display watermark bandwidth calculations
6440 * Returns the number of dram channels
6441 */
6442static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6443{
6444 u32 tmp = RREG32(MC_SHARED_CHMAP);
6445
6446 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6447 case 0:
6448 default:
6449 return 1;
6450 case 1:
6451 return 2;
6452 case 2:
6453 return 4;
6454 case 3:
6455 return 8;
6456 case 4:
6457 return 3;
6458 case 5:
6459 return 6;
6460 case 6:
6461 return 10;
6462 case 7:
6463 return 12;
6464 case 8:
6465 return 16;
6466 }
6467}
6468
6469struct dce8_wm_params {
6470 u32 dram_channels; /* number of dram channels */
6471 u32 yclk; /* bandwidth per dram data pin in kHz */
6472 u32 sclk; /* engine clock in kHz */
6473 u32 disp_clk; /* display clock in kHz */
6474 u32 src_width; /* viewport width */
6475 u32 active_time; /* active display time in ns */
6476 u32 blank_time; /* blank time in ns */
6477 bool interlaced; /* mode is interlaced */
6478 fixed20_12 vsc; /* vertical scale ratio */
6479 u32 num_heads; /* number of active crtcs */
6480 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6481 u32 lb_size; /* line buffer allocated to pipe */
6482 u32 vtaps; /* vertical scaler taps */
6483};
6484
6485/**
6486 * dce8_dram_bandwidth - get the dram bandwidth
6487 *
6488 * @wm: watermark calculation data
6489 *
6490 * Calculate the raw dram bandwidth (CIK).
6491 * Used for display watermark bandwidth calculations
6492 * Returns the dram bandwidth in MBytes/s
6493 */
6494static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6495{
6496 /* Calculate raw DRAM Bandwidth */
6497 fixed20_12 dram_efficiency; /* 0.7 */
6498 fixed20_12 yclk, dram_channels, bandwidth;
6499 fixed20_12 a;
6500
6501 a.full = dfixed_const(1000);
6502 yclk.full = dfixed_const(wm->yclk);
6503 yclk.full = dfixed_div(yclk, a);
6504 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6505 a.full = dfixed_const(10);
6506 dram_efficiency.full = dfixed_const(7);
6507 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6508 bandwidth.full = dfixed_mul(dram_channels, yclk);
6509 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6510
6511 return dfixed_trunc(bandwidth);
6512}
6513
6514/**
6515 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6516 *
6517 * @wm: watermark calculation data
6518 *
6519 * Calculate the dram bandwidth used for display (CIK).
6520 * Used for display watermark bandwidth calculations
6521 * Returns the dram bandwidth for display in MBytes/s
6522 */
6523static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6524{
6525 /* Calculate DRAM Bandwidth and the part allocated to display. */
6526 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6527 fixed20_12 yclk, dram_channels, bandwidth;
6528 fixed20_12 a;
6529
6530 a.full = dfixed_const(1000);
6531 yclk.full = dfixed_const(wm->yclk);
6532 yclk.full = dfixed_div(yclk, a);
6533 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6534 a.full = dfixed_const(10);
6535 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6536 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6537 bandwidth.full = dfixed_mul(dram_channels, yclk);
6538 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6539
6540 return dfixed_trunc(bandwidth);
6541}
6542
6543/**
6544 * dce8_data_return_bandwidth - get the data return bandwidth
6545 *
6546 * @wm: watermark calculation data
6547 *
6548 * Calculate the data return bandwidth used for display (CIK).
6549 * Used for display watermark bandwidth calculations
6550 * Returns the data return bandwidth in MBytes/s
6551 */
6552static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6553{
6554 /* Calculate the display Data return Bandwidth */
6555 fixed20_12 return_efficiency; /* 0.8 */
6556 fixed20_12 sclk, bandwidth;
6557 fixed20_12 a;
6558
6559 a.full = dfixed_const(1000);
6560 sclk.full = dfixed_const(wm->sclk);
6561 sclk.full = dfixed_div(sclk, a);
6562 a.full = dfixed_const(10);
6563 return_efficiency.full = dfixed_const(8);
6564 return_efficiency.full = dfixed_div(return_efficiency, a);
6565 a.full = dfixed_const(32);
6566 bandwidth.full = dfixed_mul(a, sclk);
6567 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6568
6569 return dfixed_trunc(bandwidth);
6570}
6571
6572/**
6573 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6574 *
6575 * @wm: watermark calculation data
6576 *
6577 * Calculate the dmif bandwidth used for display (CIK).
6578 * Used for display watermark bandwidth calculations
6579 * Returns the dmif bandwidth in MBytes/s
6580 */
6581static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6582{
6583 /* Calculate the DMIF Request Bandwidth */
6584 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6585 fixed20_12 disp_clk, bandwidth;
6586 fixed20_12 a, b;
6587
6588 a.full = dfixed_const(1000);
6589 disp_clk.full = dfixed_const(wm->disp_clk);
6590 disp_clk.full = dfixed_div(disp_clk, a);
6591 a.full = dfixed_const(32);
6592 b.full = dfixed_mul(a, disp_clk);
6593
6594 a.full = dfixed_const(10);
6595 disp_clk_request_efficiency.full = dfixed_const(8);
6596 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6597
6598 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6599
6600 return dfixed_trunc(bandwidth);
6601}
6602
6603/**
6604 * dce8_available_bandwidth - get the min available bandwidth
6605 *
6606 * @wm: watermark calculation data
6607 *
6608 * Calculate the min available bandwidth used for display (CIK).
6609 * Used for display watermark bandwidth calculations
6610 * Returns the min available bandwidth in MBytes/s
6611 */
6612static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6613{
6614 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6615 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6616 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6617 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6618
6619 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6620}
6621
6622/**
6623 * dce8_average_bandwidth - get the average available bandwidth
6624 *
6625 * @wm: watermark calculation data
6626 *
6627 * Calculate the average available bandwidth used for display (CIK).
6628 * Used for display watermark bandwidth calculations
6629 * Returns the average available bandwidth in MBytes/s
6630 */
6631static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6632{
6633 /* Calculate the display mode Average Bandwidth
6634 * DisplayMode should contain the source and destination dimensions,
6635 * timing, etc.
6636 */
6637 fixed20_12 bpp;
6638 fixed20_12 line_time;
6639 fixed20_12 src_width;
6640 fixed20_12 bandwidth;
6641 fixed20_12 a;
6642
6643 a.full = dfixed_const(1000);
6644 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6645 line_time.full = dfixed_div(line_time, a);
6646 bpp.full = dfixed_const(wm->bytes_per_pixel);
6647 src_width.full = dfixed_const(wm->src_width);
6648 bandwidth.full = dfixed_mul(src_width, bpp);
6649 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6650 bandwidth.full = dfixed_div(bandwidth, line_time);
6651
6652 return dfixed_trunc(bandwidth);
6653}
6654
6655/**
6656 * dce8_latency_watermark - get the latency watermark
6657 *
6658 * @wm: watermark calculation data
6659 *
6660 * Calculate the latency watermark (CIK).
6661 * Used for display watermark bandwidth calculations
6662 * Returns the latency watermark in ns
6663 */
6664static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6665{
6666 /* First calculate the latency in ns */
6667 u32 mc_latency = 2000; /* 2000 ns. */
6668 u32 available_bandwidth = dce8_available_bandwidth(wm);
6669 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6670 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6671 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6672 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6673 (wm->num_heads * cursor_line_pair_return_time);
6674 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6675 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6676 u32 tmp, dmif_size = 12288;
6677 fixed20_12 a, b, c;
6678
6679 if (wm->num_heads == 0)
6680 return 0;
6681
6682 a.full = dfixed_const(2);
6683 b.full = dfixed_const(1);
6684 if ((wm->vsc.full > a.full) ||
6685 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6686 (wm->vtaps >= 5) ||
6687 ((wm->vsc.full >= a.full) && wm->interlaced))
6688 max_src_lines_per_dst_line = 4;
6689 else
6690 max_src_lines_per_dst_line = 2;
6691
6692 a.full = dfixed_const(available_bandwidth);
6693 b.full = dfixed_const(wm->num_heads);
6694 a.full = dfixed_div(a, b);
6695
6696 b.full = dfixed_const(mc_latency + 512);
6697 c.full = dfixed_const(wm->disp_clk);
6698 b.full = dfixed_div(b, c);
6699
6700 c.full = dfixed_const(dmif_size);
6701 b.full = dfixed_div(c, b);
6702
6703 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6704
6705 b.full = dfixed_const(1000);
6706 c.full = dfixed_const(wm->disp_clk);
6707 b.full = dfixed_div(c, b);
6708 c.full = dfixed_const(wm->bytes_per_pixel);
6709 b.full = dfixed_mul(b, c);
6710
6711 lb_fill_bw = min(tmp, dfixed_trunc(b));
6712
6713 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6714 b.full = dfixed_const(1000);
6715 c.full = dfixed_const(lb_fill_bw);
6716 b.full = dfixed_div(c, b);
6717 a.full = dfixed_div(a, b);
6718 line_fill_time = dfixed_trunc(a);
6719
6720 if (line_fill_time < wm->active_time)
6721 return latency;
6722 else
6723 return latency + (line_fill_time - wm->active_time);
6724
6725}
6726
6727/**
6728 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6729 * average and available dram bandwidth
6730 *
6731 * @wm: watermark calculation data
6732 *
6733 * Check if the display average bandwidth fits in the display
6734 * dram bandwidth (CIK).
6735 * Used for display watermark bandwidth calculations
6736 * Returns true if the display fits, false if not.
6737 */
6738static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6739{
6740 if (dce8_average_bandwidth(wm) <=
6741 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6742 return true;
6743 else
6744 return false;
6745}
6746
6747/**
6748 * dce8_average_bandwidth_vs_available_bandwidth - check
6749 * average and available bandwidth
6750 *
6751 * @wm: watermark calculation data
6752 *
6753 * Check if the display average bandwidth fits in the display
6754 * available bandwidth (CIK).
6755 * Used for display watermark bandwidth calculations
6756 * Returns true if the display fits, false if not.
6757 */
6758static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6759{
6760 if (dce8_average_bandwidth(wm) <=
6761 (dce8_available_bandwidth(wm) / wm->num_heads))
6762 return true;
6763 else
6764 return false;
6765}
6766
6767/**
6768 * dce8_check_latency_hiding - check latency hiding
6769 *
6770 * @wm: watermark calculation data
6771 *
6772 * Check latency hiding (CIK).
6773 * Used for display watermark bandwidth calculations
6774 * Returns true if the display fits, false if not.
6775 */
6776static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6777{
6778 u32 lb_partitions = wm->lb_size / wm->src_width;
6779 u32 line_time = wm->active_time + wm->blank_time;
6780 u32 latency_tolerant_lines;
6781 u32 latency_hiding;
6782 fixed20_12 a;
6783
6784 a.full = dfixed_const(1);
6785 if (wm->vsc.full > a.full)
6786 latency_tolerant_lines = 1;
6787 else {
6788 if (lb_partitions <= (wm->vtaps + 1))
6789 latency_tolerant_lines = 1;
6790 else
6791 latency_tolerant_lines = 2;
6792 }
6793
6794 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6795
6796 if (dce8_latency_watermark(wm) <= latency_hiding)
6797 return true;
6798 else
6799 return false;
6800}
6801
6802/**
6803 * dce8_program_watermarks - program display watermarks
6804 *
6805 * @rdev: radeon_device pointer
6806 * @radeon_crtc: the selected display controller
6807 * @lb_size: line buffer size
6808 * @num_heads: number of display controllers in use
6809 *
6810 * Calculate and program the display watermarks for the
6811 * selected display controller (CIK).
6812 */
6813static void dce8_program_watermarks(struct radeon_device *rdev,
6814 struct radeon_crtc *radeon_crtc,
6815 u32 lb_size, u32 num_heads)
6816{
6817 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05006818 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04006819 u32 pixel_period;
6820 u32 line_time = 0;
6821 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6822 u32 tmp, wm_mask;
6823
6824 if (radeon_crtc->base.enabled && num_heads && mode) {
6825 pixel_period = 1000000 / (u32)mode->clock;
6826 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6827
Alex Deucher58ea2de2013-01-24 10:03:39 -05006828 /* watermark for high clocks */
6829 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6830 rdev->pm.dpm_enabled) {
6831 wm_high.yclk =
6832 radeon_dpm_get_mclk(rdev, false) * 10;
6833 wm_high.sclk =
6834 radeon_dpm_get_sclk(rdev, false) * 10;
6835 } else {
6836 wm_high.yclk = rdev->pm.current_mclk * 10;
6837 wm_high.sclk = rdev->pm.current_sclk * 10;
6838 }
6839
6840 wm_high.disp_clk = mode->clock;
6841 wm_high.src_width = mode->crtc_hdisplay;
6842 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
6843 wm_high.blank_time = line_time - wm_high.active_time;
6844 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04006845 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006846 wm_high.interlaced = true;
6847 wm_high.vsc = radeon_crtc->vsc;
6848 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04006849 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006850 wm_high.vtaps = 2;
6851 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
6852 wm_high.lb_size = lb_size;
6853 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
6854 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04006855
6856 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006857 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04006858
6859 /* possibly force display priority to high */
6860 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006861 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
6862 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
6863 !dce8_check_latency_hiding(&wm_high) ||
6864 (rdev->disp_priority == 2)) {
6865 DRM_DEBUG_KMS("force priority to high\n");
6866 }
6867
6868 /* watermark for low clocks */
6869 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6870 rdev->pm.dpm_enabled) {
6871 wm_low.yclk =
6872 radeon_dpm_get_mclk(rdev, true) * 10;
6873 wm_low.sclk =
6874 radeon_dpm_get_sclk(rdev, true) * 10;
6875 } else {
6876 wm_low.yclk = rdev->pm.current_mclk * 10;
6877 wm_low.sclk = rdev->pm.current_sclk * 10;
6878 }
6879
6880 wm_low.disp_clk = mode->clock;
6881 wm_low.src_width = mode->crtc_hdisplay;
6882 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
6883 wm_low.blank_time = line_time - wm_low.active_time;
6884 wm_low.interlaced = false;
6885 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6886 wm_low.interlaced = true;
6887 wm_low.vsc = radeon_crtc->vsc;
6888 wm_low.vtaps = 1;
6889 if (radeon_crtc->rmx_type != RMX_OFF)
6890 wm_low.vtaps = 2;
6891 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
6892 wm_low.lb_size = lb_size;
6893 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
6894 wm_low.num_heads = num_heads;
6895
6896 /* set for low clocks */
6897 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
6898
6899 /* possibly force display priority to high */
6900 /* should really do this at mode validation time... */
6901 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
6902 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
6903 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04006904 (rdev->disp_priority == 2)) {
6905 DRM_DEBUG_KMS("force priority to high\n");
6906 }
6907 }
6908
6909 /* select wm A */
6910 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6911 tmp = wm_mask;
6912 tmp &= ~LATENCY_WATERMARK_MASK(3);
6913 tmp |= LATENCY_WATERMARK_MASK(1);
6914 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6915 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6916 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6917 LATENCY_HIGH_WATERMARK(line_time)));
6918 /* select wm B */
6919 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6920 tmp &= ~LATENCY_WATERMARK_MASK(3);
6921 tmp |= LATENCY_WATERMARK_MASK(2);
6922 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6923 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6924 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6925 LATENCY_HIGH_WATERMARK(line_time)));
6926 /* restore original selection */
6927 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05006928
6929 /* save values for DPM */
6930 radeon_crtc->line_time = line_time;
6931 radeon_crtc->wm_high = latency_watermark_a;
6932 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04006933}
6934
6935/**
6936 * dce8_bandwidth_update - program display watermarks
6937 *
6938 * @rdev: radeon_device pointer
6939 *
6940 * Calculate and program the display watermarks and line
6941 * buffer allocation (CIK).
6942 */
6943void dce8_bandwidth_update(struct radeon_device *rdev)
6944{
6945 struct drm_display_mode *mode = NULL;
6946 u32 num_heads = 0, lb_size;
6947 int i;
6948
6949 radeon_update_display_priority(rdev);
6950
6951 for (i = 0; i < rdev->num_crtc; i++) {
6952 if (rdev->mode_info.crtcs[i]->base.enabled)
6953 num_heads++;
6954 }
6955 for (i = 0; i < rdev->num_crtc; i++) {
6956 mode = &rdev->mode_info.crtcs[i]->base.mode;
6957 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6958 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6959 }
6960}
Alex Deucher44fa3462012-12-18 22:17:00 -05006961
6962/**
6963 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6964 *
6965 * @rdev: radeon_device pointer
6966 *
6967 * Fetches a GPU clock counter snapshot (SI).
6968 * Returns the 64 bit clock counter snapshot.
6969 */
6970uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6971{
6972 uint64_t clock;
6973
6974 mutex_lock(&rdev->gpu_clock_mutex);
6975 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6976 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6977 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6978 mutex_unlock(&rdev->gpu_clock_mutex);
6979 return clock;
6980}
6981
Christian König87167bb2013-04-09 13:39:21 -04006982static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6983 u32 cntl_reg, u32 status_reg)
6984{
6985 int r, i;
6986 struct atom_clock_dividers dividers;
6987 uint32_t tmp;
6988
6989 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6990 clock, false, &dividers);
6991 if (r)
6992 return r;
6993
6994 tmp = RREG32_SMC(cntl_reg);
6995 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6996 tmp |= dividers.post_divider;
6997 WREG32_SMC(cntl_reg, tmp);
6998
6999 for (i = 0; i < 100; i++) {
7000 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7001 break;
7002 mdelay(10);
7003 }
7004 if (i == 100)
7005 return -ETIMEDOUT;
7006
7007 return 0;
7008}
7009
7010int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7011{
7012 int r = 0;
7013
7014 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7015 if (r)
7016 return r;
7017
7018 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7019 return r;
7020}
7021
7022int cik_uvd_resume(struct radeon_device *rdev)
7023{
7024 uint64_t addr;
7025 uint32_t size;
7026 int r;
7027
7028 r = radeon_uvd_resume(rdev);
7029 if (r)
7030 return r;
7031
7032 /* programm the VCPU memory controller bits 0-27 */
7033 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02007034 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04007035 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
7036 WREG32(UVD_VCPU_CACHE_SIZE0, size);
7037
7038 addr += size;
7039 size = RADEON_UVD_STACK_SIZE >> 3;
7040 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7041 WREG32(UVD_VCPU_CACHE_SIZE1, size);
7042
7043 addr += size;
7044 size = RADEON_UVD_HEAP_SIZE >> 3;
7045 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7046 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7047
7048 /* bits 28-31 */
7049 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7050 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7051
7052 /* bits 32-39 */
7053 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7054 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7055
7056 return 0;
7057}
Alex Deucher8a7cd272013-08-06 11:29:39 -04007058
7059static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7060{
7061 struct pci_dev *root = rdev->pdev->bus->self;
7062 int bridge_pos, gpu_pos;
7063 u32 speed_cntl, mask, current_data_rate;
7064 int ret, i;
7065 u16 tmp16;
7066
7067 if (radeon_pcie_gen2 == 0)
7068 return;
7069
7070 if (rdev->flags & RADEON_IS_IGP)
7071 return;
7072
7073 if (!(rdev->flags & RADEON_IS_PCIE))
7074 return;
7075
7076 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7077 if (ret != 0)
7078 return;
7079
7080 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7081 return;
7082
7083 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7084 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7085 LC_CURRENT_DATA_RATE_SHIFT;
7086 if (mask & DRM_PCIE_SPEED_80) {
7087 if (current_data_rate == 2) {
7088 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7089 return;
7090 }
7091 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7092 } else if (mask & DRM_PCIE_SPEED_50) {
7093 if (current_data_rate == 1) {
7094 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7095 return;
7096 }
7097 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7098 }
7099
7100 bridge_pos = pci_pcie_cap(root);
7101 if (!bridge_pos)
7102 return;
7103
7104 gpu_pos = pci_pcie_cap(rdev->pdev);
7105 if (!gpu_pos)
7106 return;
7107
7108 if (mask & DRM_PCIE_SPEED_80) {
7109 /* re-try equalization if gen3 is not already enabled */
7110 if (current_data_rate != 2) {
7111 u16 bridge_cfg, gpu_cfg;
7112 u16 bridge_cfg2, gpu_cfg2;
7113 u32 max_lw, current_lw, tmp;
7114
7115 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7116 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7117
7118 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7119 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7120
7121 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7122 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7123
7124 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7125 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7126 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7127
7128 if (current_lw < max_lw) {
7129 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7130 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7131 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7132 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7133 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7134 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7135 }
7136 }
7137
7138 for (i = 0; i < 10; i++) {
7139 /* check status */
7140 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7141 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7142 break;
7143
7144 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7145 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7146
7147 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7148 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7149
7150 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7151 tmp |= LC_SET_QUIESCE;
7152 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7153
7154 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7155 tmp |= LC_REDO_EQ;
7156 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7157
7158 mdelay(100);
7159
7160 /* linkctl */
7161 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7162 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7163 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7164 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7165
7166 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7167 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7168 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7169 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7170
7171 /* linkctl2 */
7172 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7173 tmp16 &= ~((1 << 4) | (7 << 9));
7174 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7175 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7176
7177 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7178 tmp16 &= ~((1 << 4) | (7 << 9));
7179 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7180 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7181
7182 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7183 tmp &= ~LC_SET_QUIESCE;
7184 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7185 }
7186 }
7187 }
7188
7189 /* set the link speed */
7190 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7191 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7192 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7193
7194 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7195 tmp16 &= ~0xf;
7196 if (mask & DRM_PCIE_SPEED_80)
7197 tmp16 |= 3; /* gen3 */
7198 else if (mask & DRM_PCIE_SPEED_50)
7199 tmp16 |= 2; /* gen2 */
7200 else
7201 tmp16 |= 1; /* gen1 */
7202 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7203
7204 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7205 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7206 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7207
7208 for (i = 0; i < rdev->usec_timeout; i++) {
7209 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7210 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7211 break;
7212 udelay(1);
7213 }
7214}