blob: 2ceb9003206c1e88edc018137f498e65508b760b [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher02c81322012-12-18 21:43:07 -050034
35MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040041MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050042MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
43MODULE_FIRMWARE("radeon/KAVERI_me.bin");
44MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
45MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
46MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040047MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050048MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
49MODULE_FIRMWARE("radeon/KABINI_me.bin");
50MODULE_FIRMWARE("radeon/KABINI_ce.bin");
51MODULE_FIRMWARE("radeon/KABINI_mec.bin");
52MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040053MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050054
Alex Deuchera59781b2012-11-09 10:45:57 -050055extern int r600_ih_ring_alloc(struct radeon_device *rdev);
56extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040057extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
58extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040059extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040060extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040061extern void si_rlc_fini(struct radeon_device *rdev);
62extern int si_rlc_init(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040063static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040064
Alex Deucher6e2c3c02013-04-03 19:28:32 -040065/*
66 * Indirect registers accessor
67 */
68u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
69{
70 u32 r;
71
72 WREG32(PCIE_INDEX, reg);
73 (void)RREG32(PCIE_INDEX);
74 r = RREG32(PCIE_DATA);
75 return r;
76}
77
78void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
79{
80 WREG32(PCIE_INDEX, reg);
81 (void)RREG32(PCIE_INDEX);
82 WREG32(PCIE_DATA, v);
83 (void)RREG32(PCIE_DATA);
84}
85
Alex Deucher0aafd312013-04-09 14:43:30 -040086static const u32 bonaire_golden_spm_registers[] =
87{
88 0x30800, 0xe0ffffff, 0xe0000000
89};
90
91static const u32 bonaire_golden_common_registers[] =
92{
93 0xc770, 0xffffffff, 0x00000800,
94 0xc774, 0xffffffff, 0x00000800,
95 0xc798, 0xffffffff, 0x00007fbf,
96 0xc79c, 0xffffffff, 0x00007faf
97};
98
99static const u32 bonaire_golden_registers[] =
100{
101 0x3354, 0x00000333, 0x00000333,
102 0x3350, 0x000c0fc0, 0x00040200,
103 0x9a10, 0x00010000, 0x00058208,
104 0x3c000, 0xffff1fff, 0x00140000,
105 0x3c200, 0xfdfc0fff, 0x00000100,
106 0x3c234, 0x40000000, 0x40000200,
107 0x9830, 0xffffffff, 0x00000000,
108 0x9834, 0xf00fffff, 0x00000400,
109 0x9838, 0x0002021c, 0x00020200,
110 0xc78, 0x00000080, 0x00000000,
111 0x5bb0, 0x000000f0, 0x00000070,
112 0x5bc0, 0xf0311fff, 0x80300000,
113 0x98f8, 0x73773777, 0x12010001,
114 0x350c, 0x00810000, 0x408af000,
115 0x7030, 0x31000111, 0x00000011,
116 0x2f48, 0x73773777, 0x12010001,
117 0x220c, 0x00007fb6, 0x0021a1b1,
118 0x2210, 0x00007fb6, 0x002021b1,
119 0x2180, 0x00007fb6, 0x00002191,
120 0x2218, 0x00007fb6, 0x002121b1,
121 0x221c, 0x00007fb6, 0x002021b1,
122 0x21dc, 0x00007fb6, 0x00002191,
123 0x21e0, 0x00007fb6, 0x00002191,
124 0x3628, 0x0000003f, 0x0000000a,
125 0x362c, 0x0000003f, 0x0000000a,
126 0x2ae4, 0x00073ffe, 0x000022a2,
127 0x240c, 0x000007ff, 0x00000000,
128 0x8a14, 0xf000003f, 0x00000007,
129 0x8bf0, 0x00002001, 0x00000001,
130 0x8b24, 0xffffffff, 0x00ffffff,
131 0x30a04, 0x0000ff0f, 0x00000000,
132 0x28a4c, 0x07ffffff, 0x06000000,
133 0x4d8, 0x00000fff, 0x00000100,
134 0x3e78, 0x00000001, 0x00000002,
135 0x9100, 0x03000000, 0x0362c688,
136 0x8c00, 0x000000ff, 0x00000001,
137 0xe40, 0x00001fff, 0x00001fff,
138 0x9060, 0x0000007f, 0x00000020,
139 0x9508, 0x00010000, 0x00010000,
140 0xac14, 0x000003ff, 0x000000f3,
141 0xac0c, 0xffffffff, 0x00001032
142};
143
144static const u32 bonaire_mgcg_cgcg_init[] =
145{
146 0xc420, 0xffffffff, 0xfffffffc,
147 0x30800, 0xffffffff, 0xe0000000,
148 0x3c2a0, 0xffffffff, 0x00000100,
149 0x3c208, 0xffffffff, 0x00000100,
150 0x3c2c0, 0xffffffff, 0xc0000100,
151 0x3c2c8, 0xffffffff, 0xc0000100,
152 0x3c2c4, 0xffffffff, 0xc0000100,
153 0x55e4, 0xffffffff, 0x00600100,
154 0x3c280, 0xffffffff, 0x00000100,
155 0x3c214, 0xffffffff, 0x06000100,
156 0x3c220, 0xffffffff, 0x00000100,
157 0x3c218, 0xffffffff, 0x06000100,
158 0x3c204, 0xffffffff, 0x00000100,
159 0x3c2e0, 0xffffffff, 0x00000100,
160 0x3c224, 0xffffffff, 0x00000100,
161 0x3c200, 0xffffffff, 0x00000100,
162 0x3c230, 0xffffffff, 0x00000100,
163 0x3c234, 0xffffffff, 0x00000100,
164 0x3c250, 0xffffffff, 0x00000100,
165 0x3c254, 0xffffffff, 0x00000100,
166 0x3c258, 0xffffffff, 0x00000100,
167 0x3c25c, 0xffffffff, 0x00000100,
168 0x3c260, 0xffffffff, 0x00000100,
169 0x3c27c, 0xffffffff, 0x00000100,
170 0x3c278, 0xffffffff, 0x00000100,
171 0x3c210, 0xffffffff, 0x06000100,
172 0x3c290, 0xffffffff, 0x00000100,
173 0x3c274, 0xffffffff, 0x00000100,
174 0x3c2b4, 0xffffffff, 0x00000100,
175 0x3c2b0, 0xffffffff, 0x00000100,
176 0x3c270, 0xffffffff, 0x00000100,
177 0x30800, 0xffffffff, 0xe0000000,
178 0x3c020, 0xffffffff, 0x00010000,
179 0x3c024, 0xffffffff, 0x00030002,
180 0x3c028, 0xffffffff, 0x00040007,
181 0x3c02c, 0xffffffff, 0x00060005,
182 0x3c030, 0xffffffff, 0x00090008,
183 0x3c034, 0xffffffff, 0x00010000,
184 0x3c038, 0xffffffff, 0x00030002,
185 0x3c03c, 0xffffffff, 0x00040007,
186 0x3c040, 0xffffffff, 0x00060005,
187 0x3c044, 0xffffffff, 0x00090008,
188 0x3c048, 0xffffffff, 0x00010000,
189 0x3c04c, 0xffffffff, 0x00030002,
190 0x3c050, 0xffffffff, 0x00040007,
191 0x3c054, 0xffffffff, 0x00060005,
192 0x3c058, 0xffffffff, 0x00090008,
193 0x3c05c, 0xffffffff, 0x00010000,
194 0x3c060, 0xffffffff, 0x00030002,
195 0x3c064, 0xffffffff, 0x00040007,
196 0x3c068, 0xffffffff, 0x00060005,
197 0x3c06c, 0xffffffff, 0x00090008,
198 0x3c070, 0xffffffff, 0x00010000,
199 0x3c074, 0xffffffff, 0x00030002,
200 0x3c078, 0xffffffff, 0x00040007,
201 0x3c07c, 0xffffffff, 0x00060005,
202 0x3c080, 0xffffffff, 0x00090008,
203 0x3c084, 0xffffffff, 0x00010000,
204 0x3c088, 0xffffffff, 0x00030002,
205 0x3c08c, 0xffffffff, 0x00040007,
206 0x3c090, 0xffffffff, 0x00060005,
207 0x3c094, 0xffffffff, 0x00090008,
208 0x3c098, 0xffffffff, 0x00010000,
209 0x3c09c, 0xffffffff, 0x00030002,
210 0x3c0a0, 0xffffffff, 0x00040007,
211 0x3c0a4, 0xffffffff, 0x00060005,
212 0x3c0a8, 0xffffffff, 0x00090008,
213 0x3c000, 0xffffffff, 0x96e00200,
214 0x8708, 0xffffffff, 0x00900100,
215 0xc424, 0xffffffff, 0x0020003f,
216 0x38, 0xffffffff, 0x0140001c,
217 0x3c, 0x000f0000, 0x000f0000,
218 0x220, 0xffffffff, 0xC060000C,
219 0x224, 0xc0000fff, 0x00000100,
220 0xf90, 0xffffffff, 0x00000100,
221 0xf98, 0x00000101, 0x00000000,
222 0x20a8, 0xffffffff, 0x00000104,
223 0x55e4, 0xff000fff, 0x00000100,
224 0x30cc, 0xc0000fff, 0x00000104,
225 0xc1e4, 0x00000001, 0x00000001,
226 0xd00c, 0xff000ff0, 0x00000100,
227 0xd80c, 0xff000ff0, 0x00000100
228};
229
230static const u32 spectre_golden_spm_registers[] =
231{
232 0x30800, 0xe0ffffff, 0xe0000000
233};
234
235static const u32 spectre_golden_common_registers[] =
236{
237 0xc770, 0xffffffff, 0x00000800,
238 0xc774, 0xffffffff, 0x00000800,
239 0xc798, 0xffffffff, 0x00007fbf,
240 0xc79c, 0xffffffff, 0x00007faf
241};
242
243static const u32 spectre_golden_registers[] =
244{
245 0x3c000, 0xffff1fff, 0x96940200,
246 0x3c00c, 0xffff0001, 0xff000000,
247 0x3c200, 0xfffc0fff, 0x00000100,
248 0x6ed8, 0x00010101, 0x00010000,
249 0x9834, 0xf00fffff, 0x00000400,
250 0x9838, 0xfffffffc, 0x00020200,
251 0x5bb0, 0x000000f0, 0x00000070,
252 0x5bc0, 0xf0311fff, 0x80300000,
253 0x98f8, 0x73773777, 0x12010001,
254 0x9b7c, 0x00ff0000, 0x00fc0000,
255 0x2f48, 0x73773777, 0x12010001,
256 0x8a14, 0xf000003f, 0x00000007,
257 0x8b24, 0xffffffff, 0x00ffffff,
258 0x28350, 0x3f3f3fff, 0x00000082,
259 0x28355, 0x0000003f, 0x00000000,
260 0x3e78, 0x00000001, 0x00000002,
261 0x913c, 0xffff03df, 0x00000004,
262 0xc768, 0x00000008, 0x00000008,
263 0x8c00, 0x000008ff, 0x00000800,
264 0x9508, 0x00010000, 0x00010000,
265 0xac0c, 0xffffffff, 0x54763210,
266 0x214f8, 0x01ff01ff, 0x00000002,
267 0x21498, 0x007ff800, 0x00200000,
268 0x2015c, 0xffffffff, 0x00000f40,
269 0x30934, 0xffffffff, 0x00000001
270};
271
272static const u32 spectre_mgcg_cgcg_init[] =
273{
274 0xc420, 0xffffffff, 0xfffffffc,
275 0x30800, 0xffffffff, 0xe0000000,
276 0x3c2a0, 0xffffffff, 0x00000100,
277 0x3c208, 0xffffffff, 0x00000100,
278 0x3c2c0, 0xffffffff, 0x00000100,
279 0x3c2c8, 0xffffffff, 0x00000100,
280 0x3c2c4, 0xffffffff, 0x00000100,
281 0x55e4, 0xffffffff, 0x00600100,
282 0x3c280, 0xffffffff, 0x00000100,
283 0x3c214, 0xffffffff, 0x06000100,
284 0x3c220, 0xffffffff, 0x00000100,
285 0x3c218, 0xffffffff, 0x06000100,
286 0x3c204, 0xffffffff, 0x00000100,
287 0x3c2e0, 0xffffffff, 0x00000100,
288 0x3c224, 0xffffffff, 0x00000100,
289 0x3c200, 0xffffffff, 0x00000100,
290 0x3c230, 0xffffffff, 0x00000100,
291 0x3c234, 0xffffffff, 0x00000100,
292 0x3c250, 0xffffffff, 0x00000100,
293 0x3c254, 0xffffffff, 0x00000100,
294 0x3c258, 0xffffffff, 0x00000100,
295 0x3c25c, 0xffffffff, 0x00000100,
296 0x3c260, 0xffffffff, 0x00000100,
297 0x3c27c, 0xffffffff, 0x00000100,
298 0x3c278, 0xffffffff, 0x00000100,
299 0x3c210, 0xffffffff, 0x06000100,
300 0x3c290, 0xffffffff, 0x00000100,
301 0x3c274, 0xffffffff, 0x00000100,
302 0x3c2b4, 0xffffffff, 0x00000100,
303 0x3c2b0, 0xffffffff, 0x00000100,
304 0x3c270, 0xffffffff, 0x00000100,
305 0x30800, 0xffffffff, 0xe0000000,
306 0x3c020, 0xffffffff, 0x00010000,
307 0x3c024, 0xffffffff, 0x00030002,
308 0x3c028, 0xffffffff, 0x00040007,
309 0x3c02c, 0xffffffff, 0x00060005,
310 0x3c030, 0xffffffff, 0x00090008,
311 0x3c034, 0xffffffff, 0x00010000,
312 0x3c038, 0xffffffff, 0x00030002,
313 0x3c03c, 0xffffffff, 0x00040007,
314 0x3c040, 0xffffffff, 0x00060005,
315 0x3c044, 0xffffffff, 0x00090008,
316 0x3c048, 0xffffffff, 0x00010000,
317 0x3c04c, 0xffffffff, 0x00030002,
318 0x3c050, 0xffffffff, 0x00040007,
319 0x3c054, 0xffffffff, 0x00060005,
320 0x3c058, 0xffffffff, 0x00090008,
321 0x3c05c, 0xffffffff, 0x00010000,
322 0x3c060, 0xffffffff, 0x00030002,
323 0x3c064, 0xffffffff, 0x00040007,
324 0x3c068, 0xffffffff, 0x00060005,
325 0x3c06c, 0xffffffff, 0x00090008,
326 0x3c070, 0xffffffff, 0x00010000,
327 0x3c074, 0xffffffff, 0x00030002,
328 0x3c078, 0xffffffff, 0x00040007,
329 0x3c07c, 0xffffffff, 0x00060005,
330 0x3c080, 0xffffffff, 0x00090008,
331 0x3c084, 0xffffffff, 0x00010000,
332 0x3c088, 0xffffffff, 0x00030002,
333 0x3c08c, 0xffffffff, 0x00040007,
334 0x3c090, 0xffffffff, 0x00060005,
335 0x3c094, 0xffffffff, 0x00090008,
336 0x3c098, 0xffffffff, 0x00010000,
337 0x3c09c, 0xffffffff, 0x00030002,
338 0x3c0a0, 0xffffffff, 0x00040007,
339 0x3c0a4, 0xffffffff, 0x00060005,
340 0x3c0a8, 0xffffffff, 0x00090008,
341 0x3c0ac, 0xffffffff, 0x00010000,
342 0x3c0b0, 0xffffffff, 0x00030002,
343 0x3c0b4, 0xffffffff, 0x00040007,
344 0x3c0b8, 0xffffffff, 0x00060005,
345 0x3c0bc, 0xffffffff, 0x00090008,
346 0x3c000, 0xffffffff, 0x96e00200,
347 0x8708, 0xffffffff, 0x00900100,
348 0xc424, 0xffffffff, 0x0020003f,
349 0x38, 0xffffffff, 0x0140001c,
350 0x3c, 0x000f0000, 0x000f0000,
351 0x220, 0xffffffff, 0xC060000C,
352 0x224, 0xc0000fff, 0x00000100,
353 0xf90, 0xffffffff, 0x00000100,
354 0xf98, 0x00000101, 0x00000000,
355 0x20a8, 0xffffffff, 0x00000104,
356 0x55e4, 0xff000fff, 0x00000100,
357 0x30cc, 0xc0000fff, 0x00000104,
358 0xc1e4, 0x00000001, 0x00000001,
359 0xd00c, 0xff000ff0, 0x00000100,
360 0xd80c, 0xff000ff0, 0x00000100
361};
362
363static const u32 kalindi_golden_spm_registers[] =
364{
365 0x30800, 0xe0ffffff, 0xe0000000
366};
367
368static const u32 kalindi_golden_common_registers[] =
369{
370 0xc770, 0xffffffff, 0x00000800,
371 0xc774, 0xffffffff, 0x00000800,
372 0xc798, 0xffffffff, 0x00007fbf,
373 0xc79c, 0xffffffff, 0x00007faf
374};
375
376static const u32 kalindi_golden_registers[] =
377{
378 0x3c000, 0xffffdfff, 0x6e944040,
379 0x55e4, 0xff607fff, 0xfc000100,
380 0x3c220, 0xff000fff, 0x00000100,
381 0x3c224, 0xff000fff, 0x00000100,
382 0x3c200, 0xfffc0fff, 0x00000100,
383 0x6ed8, 0x00010101, 0x00010000,
384 0x9830, 0xffffffff, 0x00000000,
385 0x9834, 0xf00fffff, 0x00000400,
386 0x5bb0, 0x000000f0, 0x00000070,
387 0x5bc0, 0xf0311fff, 0x80300000,
388 0x98f8, 0x73773777, 0x12010001,
389 0x98fc, 0xffffffff, 0x00000010,
390 0x9b7c, 0x00ff0000, 0x00fc0000,
391 0x8030, 0x00001f0f, 0x0000100a,
392 0x2f48, 0x73773777, 0x12010001,
393 0x2408, 0x000fffff, 0x000c007f,
394 0x8a14, 0xf000003f, 0x00000007,
395 0x8b24, 0x3fff3fff, 0x00ffcfff,
396 0x30a04, 0x0000ff0f, 0x00000000,
397 0x28a4c, 0x07ffffff, 0x06000000,
398 0x4d8, 0x00000fff, 0x00000100,
399 0x3e78, 0x00000001, 0x00000002,
400 0xc768, 0x00000008, 0x00000008,
401 0x8c00, 0x000000ff, 0x00000003,
402 0x214f8, 0x01ff01ff, 0x00000002,
403 0x21498, 0x007ff800, 0x00200000,
404 0x2015c, 0xffffffff, 0x00000f40,
405 0x88c4, 0x001f3ae3, 0x00000082,
406 0x88d4, 0x0000001f, 0x00000010,
407 0x30934, 0xffffffff, 0x00000000
408};
409
410static const u32 kalindi_mgcg_cgcg_init[] =
411{
412 0xc420, 0xffffffff, 0xfffffffc,
413 0x30800, 0xffffffff, 0xe0000000,
414 0x3c2a0, 0xffffffff, 0x00000100,
415 0x3c208, 0xffffffff, 0x00000100,
416 0x3c2c0, 0xffffffff, 0x00000100,
417 0x3c2c8, 0xffffffff, 0x00000100,
418 0x3c2c4, 0xffffffff, 0x00000100,
419 0x55e4, 0xffffffff, 0x00600100,
420 0x3c280, 0xffffffff, 0x00000100,
421 0x3c214, 0xffffffff, 0x06000100,
422 0x3c220, 0xffffffff, 0x00000100,
423 0x3c218, 0xffffffff, 0x06000100,
424 0x3c204, 0xffffffff, 0x00000100,
425 0x3c2e0, 0xffffffff, 0x00000100,
426 0x3c224, 0xffffffff, 0x00000100,
427 0x3c200, 0xffffffff, 0x00000100,
428 0x3c230, 0xffffffff, 0x00000100,
429 0x3c234, 0xffffffff, 0x00000100,
430 0x3c250, 0xffffffff, 0x00000100,
431 0x3c254, 0xffffffff, 0x00000100,
432 0x3c258, 0xffffffff, 0x00000100,
433 0x3c25c, 0xffffffff, 0x00000100,
434 0x3c260, 0xffffffff, 0x00000100,
435 0x3c27c, 0xffffffff, 0x00000100,
436 0x3c278, 0xffffffff, 0x00000100,
437 0x3c210, 0xffffffff, 0x06000100,
438 0x3c290, 0xffffffff, 0x00000100,
439 0x3c274, 0xffffffff, 0x00000100,
440 0x3c2b4, 0xffffffff, 0x00000100,
441 0x3c2b0, 0xffffffff, 0x00000100,
442 0x3c270, 0xffffffff, 0x00000100,
443 0x30800, 0xffffffff, 0xe0000000,
444 0x3c020, 0xffffffff, 0x00010000,
445 0x3c024, 0xffffffff, 0x00030002,
446 0x3c028, 0xffffffff, 0x00040007,
447 0x3c02c, 0xffffffff, 0x00060005,
448 0x3c030, 0xffffffff, 0x00090008,
449 0x3c034, 0xffffffff, 0x00010000,
450 0x3c038, 0xffffffff, 0x00030002,
451 0x3c03c, 0xffffffff, 0x00040007,
452 0x3c040, 0xffffffff, 0x00060005,
453 0x3c044, 0xffffffff, 0x00090008,
454 0x3c000, 0xffffffff, 0x96e00200,
455 0x8708, 0xffffffff, 0x00900100,
456 0xc424, 0xffffffff, 0x0020003f,
457 0x38, 0xffffffff, 0x0140001c,
458 0x3c, 0x000f0000, 0x000f0000,
459 0x220, 0xffffffff, 0xC060000C,
460 0x224, 0xc0000fff, 0x00000100,
461 0x20a8, 0xffffffff, 0x00000104,
462 0x55e4, 0xff000fff, 0x00000100,
463 0x30cc, 0xc0000fff, 0x00000104,
464 0xc1e4, 0x00000001, 0x00000001,
465 0xd00c, 0xff000ff0, 0x00000100,
466 0xd80c, 0xff000ff0, 0x00000100
467};
468
469static void cik_init_golden_registers(struct radeon_device *rdev)
470{
471 switch (rdev->family) {
472 case CHIP_BONAIRE:
473 radeon_program_register_sequence(rdev,
474 bonaire_mgcg_cgcg_init,
475 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
476 radeon_program_register_sequence(rdev,
477 bonaire_golden_registers,
478 (const u32)ARRAY_SIZE(bonaire_golden_registers));
479 radeon_program_register_sequence(rdev,
480 bonaire_golden_common_registers,
481 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
482 radeon_program_register_sequence(rdev,
483 bonaire_golden_spm_registers,
484 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
485 break;
486 case CHIP_KABINI:
487 radeon_program_register_sequence(rdev,
488 kalindi_mgcg_cgcg_init,
489 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
490 radeon_program_register_sequence(rdev,
491 kalindi_golden_registers,
492 (const u32)ARRAY_SIZE(kalindi_golden_registers));
493 radeon_program_register_sequence(rdev,
494 kalindi_golden_common_registers,
495 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
496 radeon_program_register_sequence(rdev,
497 kalindi_golden_spm_registers,
498 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
499 break;
500 case CHIP_KAVERI:
501 radeon_program_register_sequence(rdev,
502 spectre_mgcg_cgcg_init,
503 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
504 radeon_program_register_sequence(rdev,
505 spectre_golden_registers,
506 (const u32)ARRAY_SIZE(spectre_golden_registers));
507 radeon_program_register_sequence(rdev,
508 spectre_golden_common_registers,
509 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
510 radeon_program_register_sequence(rdev,
511 spectre_golden_spm_registers,
512 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
513 break;
514 default:
515 break;
516 }
517}
518
Alex Deucher2c679122013-04-09 13:32:18 -0400519/**
520 * cik_get_xclk - get the xclk
521 *
522 * @rdev: radeon_device pointer
523 *
524 * Returns the reference clock used by the gfx engine
525 * (CIK).
526 */
527u32 cik_get_xclk(struct radeon_device *rdev)
528{
529 u32 reference_clock = rdev->clock.spll.reference_freq;
530
531 if (rdev->flags & RADEON_IS_IGP) {
532 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
533 return reference_clock / 2;
534 } else {
535 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
536 return reference_clock / 4;
537 }
538 return reference_clock;
539}
540
Alex Deucher75efdee2013-03-04 12:47:46 -0500541/**
542 * cik_mm_rdoorbell - read a doorbell dword
543 *
544 * @rdev: radeon_device pointer
545 * @offset: byte offset into the aperture
546 *
547 * Returns the value in the doorbell aperture at the
548 * requested offset (CIK).
549 */
550u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
551{
552 if (offset < rdev->doorbell.size) {
553 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
554 } else {
555 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
556 return 0;
557 }
558}
559
560/**
561 * cik_mm_wdoorbell - write a doorbell dword
562 *
563 * @rdev: radeon_device pointer
564 * @offset: byte offset into the aperture
565 * @v: value to write
566 *
567 * Writes @v to the doorbell aperture at the
568 * requested offset (CIK).
569 */
570void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
571{
572 if (offset < rdev->doorbell.size) {
573 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
574 } else {
575 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
576 }
577}
578
Alex Deucherbc8273f2012-06-29 19:44:04 -0400579#define BONAIRE_IO_MC_REGS_SIZE 36
580
581static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
582{
583 {0x00000070, 0x04400000},
584 {0x00000071, 0x80c01803},
585 {0x00000072, 0x00004004},
586 {0x00000073, 0x00000100},
587 {0x00000074, 0x00ff0000},
588 {0x00000075, 0x34000000},
589 {0x00000076, 0x08000014},
590 {0x00000077, 0x00cc08ec},
591 {0x00000078, 0x00000400},
592 {0x00000079, 0x00000000},
593 {0x0000007a, 0x04090000},
594 {0x0000007c, 0x00000000},
595 {0x0000007e, 0x4408a8e8},
596 {0x0000007f, 0x00000304},
597 {0x00000080, 0x00000000},
598 {0x00000082, 0x00000001},
599 {0x00000083, 0x00000002},
600 {0x00000084, 0xf3e4f400},
601 {0x00000085, 0x052024e3},
602 {0x00000087, 0x00000000},
603 {0x00000088, 0x01000000},
604 {0x0000008a, 0x1c0a0000},
605 {0x0000008b, 0xff010000},
606 {0x0000008d, 0xffffefff},
607 {0x0000008e, 0xfff3efff},
608 {0x0000008f, 0xfff3efbf},
609 {0x00000092, 0xf7ffffff},
610 {0x00000093, 0xffffff7f},
611 {0x00000095, 0x00101101},
612 {0x00000096, 0x00000fff},
613 {0x00000097, 0x00116fff},
614 {0x00000098, 0x60010000},
615 {0x00000099, 0x10010000},
616 {0x0000009a, 0x00006000},
617 {0x0000009b, 0x00001000},
618 {0x0000009f, 0x00b48000}
619};
620
Alex Deucherb556b122013-01-29 10:44:22 -0500621/**
622 * cik_srbm_select - select specific register instances
623 *
624 * @rdev: radeon_device pointer
625 * @me: selected ME (micro engine)
626 * @pipe: pipe
627 * @queue: queue
628 * @vmid: VMID
629 *
630 * Switches the currently active registers instances. Some
631 * registers are instanced per VMID, others are instanced per
632 * me/pipe/queue combination.
633 */
634static void cik_srbm_select(struct radeon_device *rdev,
635 u32 me, u32 pipe, u32 queue, u32 vmid)
636{
637 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
638 MEID(me & 0x3) |
639 VMID(vmid & 0xf) |
640 QUEUEID(queue & 0x7));
641 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
642}
643
Alex Deucherbc8273f2012-06-29 19:44:04 -0400644/* ucode loading */
645/**
646 * ci_mc_load_microcode - load MC ucode into the hw
647 *
648 * @rdev: radeon_device pointer
649 *
650 * Load the GDDR MC ucode into the hw (CIK).
651 * Returns 0 on success, error on failure.
652 */
653static int ci_mc_load_microcode(struct radeon_device *rdev)
654{
655 const __be32 *fw_data;
656 u32 running, blackout = 0;
657 u32 *io_mc_regs;
658 int i, ucode_size, regs_size;
659
660 if (!rdev->mc_fw)
661 return -EINVAL;
662
663 switch (rdev->family) {
664 case CHIP_BONAIRE:
665 default:
666 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
667 ucode_size = CIK_MC_UCODE_SIZE;
668 regs_size = BONAIRE_IO_MC_REGS_SIZE;
669 break;
670 }
671
672 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
673
674 if (running == 0) {
675 if (running) {
676 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
677 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
678 }
679
680 /* reset the engine and set to writable */
681 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
682 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
683
684 /* load mc io regs */
685 for (i = 0; i < regs_size; i++) {
686 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
687 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
688 }
689 /* load the MC ucode */
690 fw_data = (const __be32 *)rdev->mc_fw->data;
691 for (i = 0; i < ucode_size; i++)
692 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
693
694 /* put the engine back into the active state */
695 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
696 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
698
699 /* wait for training to complete */
700 for (i = 0; i < rdev->usec_timeout; i++) {
701 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
702 break;
703 udelay(1);
704 }
705 for (i = 0; i < rdev->usec_timeout; i++) {
706 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
707 break;
708 udelay(1);
709 }
710
711 if (running)
712 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
713 }
714
715 return 0;
716}
717
Alex Deucher02c81322012-12-18 21:43:07 -0500718/**
719 * cik_init_microcode - load ucode images from disk
720 *
721 * @rdev: radeon_device pointer
722 *
723 * Use the firmware interface to load the ucode images into
724 * the driver (not loaded into hw).
725 * Returns 0 on success, error on failure.
726 */
727static int cik_init_microcode(struct radeon_device *rdev)
728{
Alex Deucher02c81322012-12-18 21:43:07 -0500729 const char *chip_name;
730 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400731 mec_req_size, rlc_req_size, mc_req_size,
732 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500733 char fw_name[30];
734 int err;
735
736 DRM_DEBUG("\n");
737
Alex Deucher02c81322012-12-18 21:43:07 -0500738 switch (rdev->family) {
739 case CHIP_BONAIRE:
740 chip_name = "BONAIRE";
741 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
742 me_req_size = CIK_ME_UCODE_SIZE * 4;
743 ce_req_size = CIK_CE_UCODE_SIZE * 4;
744 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
745 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
746 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400747 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500748 break;
749 case CHIP_KAVERI:
750 chip_name = "KAVERI";
751 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
752 me_req_size = CIK_ME_UCODE_SIZE * 4;
753 ce_req_size = CIK_CE_UCODE_SIZE * 4;
754 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
755 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400756 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500757 break;
758 case CHIP_KABINI:
759 chip_name = "KABINI";
760 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
761 me_req_size = CIK_ME_UCODE_SIZE * 4;
762 ce_req_size = CIK_CE_UCODE_SIZE * 4;
763 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
764 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400765 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500766 break;
767 default: BUG();
768 }
769
770 DRM_INFO("Loading %s Microcode\n", chip_name);
771
772 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400773 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500774 if (err)
775 goto out;
776 if (rdev->pfp_fw->size != pfp_req_size) {
777 printk(KERN_ERR
778 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
779 rdev->pfp_fw->size, fw_name);
780 err = -EINVAL;
781 goto out;
782 }
783
784 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400785 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500786 if (err)
787 goto out;
788 if (rdev->me_fw->size != me_req_size) {
789 printk(KERN_ERR
790 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
791 rdev->me_fw->size, fw_name);
792 err = -EINVAL;
793 }
794
795 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400796 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500797 if (err)
798 goto out;
799 if (rdev->ce_fw->size != ce_req_size) {
800 printk(KERN_ERR
801 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
802 rdev->ce_fw->size, fw_name);
803 err = -EINVAL;
804 }
805
806 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400807 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500808 if (err)
809 goto out;
810 if (rdev->mec_fw->size != mec_req_size) {
811 printk(KERN_ERR
812 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
813 rdev->mec_fw->size, fw_name);
814 err = -EINVAL;
815 }
816
817 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400818 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500819 if (err)
820 goto out;
821 if (rdev->rlc_fw->size != rlc_req_size) {
822 printk(KERN_ERR
823 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
824 rdev->rlc_fw->size, fw_name);
825 err = -EINVAL;
826 }
827
Alex Deucher21a93e12013-04-09 12:47:11 -0400828 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400829 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -0400830 if (err)
831 goto out;
832 if (rdev->sdma_fw->size != sdma_req_size) {
833 printk(KERN_ERR
834 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
835 rdev->sdma_fw->size, fw_name);
836 err = -EINVAL;
837 }
838
Alex Deucher02c81322012-12-18 21:43:07 -0500839 /* No MC ucode on APUs */
840 if (!(rdev->flags & RADEON_IS_IGP)) {
841 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400842 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500843 if (err)
844 goto out;
845 if (rdev->mc_fw->size != mc_req_size) {
846 printk(KERN_ERR
847 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
848 rdev->mc_fw->size, fw_name);
849 err = -EINVAL;
850 }
851 }
852
853out:
Alex Deucher02c81322012-12-18 21:43:07 -0500854 if (err) {
855 if (err != -EINVAL)
856 printk(KERN_ERR
857 "cik_cp: Failed to load firmware \"%s\"\n",
858 fw_name);
859 release_firmware(rdev->pfp_fw);
860 rdev->pfp_fw = NULL;
861 release_firmware(rdev->me_fw);
862 rdev->me_fw = NULL;
863 release_firmware(rdev->ce_fw);
864 rdev->ce_fw = NULL;
865 release_firmware(rdev->rlc_fw);
866 rdev->rlc_fw = NULL;
867 release_firmware(rdev->mc_fw);
868 rdev->mc_fw = NULL;
869 }
870 return err;
871}
872
Alex Deucher8cc1a532013-04-09 12:41:24 -0400873/*
874 * Core functions
875 */
876/**
877 * cik_tiling_mode_table_init - init the hw tiling table
878 *
879 * @rdev: radeon_device pointer
880 *
881 * Starting with SI, the tiling setup is done globally in a
882 * set of 32 tiling modes. Rather than selecting each set of
883 * parameters per surface as on older asics, we just select
884 * which index in the tiling table we want to use, and the
885 * surface uses those parameters (CIK).
886 */
887static void cik_tiling_mode_table_init(struct radeon_device *rdev)
888{
889 const u32 num_tile_mode_states = 32;
890 const u32 num_secondary_tile_mode_states = 16;
891 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
892 u32 num_pipe_configs;
893 u32 num_rbs = rdev->config.cik.max_backends_per_se *
894 rdev->config.cik.max_shader_engines;
895
896 switch (rdev->config.cik.mem_row_size_in_kb) {
897 case 1:
898 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
899 break;
900 case 2:
901 default:
902 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
903 break;
904 case 4:
905 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
906 break;
907 }
908
909 num_pipe_configs = rdev->config.cik.max_tile_pipes;
910 if (num_pipe_configs > 8)
911 num_pipe_configs = 8; /* ??? */
912
913 if (num_pipe_configs == 8) {
914 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
915 switch (reg_offset) {
916 case 0:
917 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
918 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
921 break;
922 case 1:
923 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
926 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
927 break;
928 case 2:
929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
931 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
932 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
933 break;
934 case 3:
935 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
938 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
939 break;
940 case 4:
941 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
943 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
944 TILE_SPLIT(split_equal_to_row_size));
945 break;
946 case 5:
947 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
949 break;
950 case 6:
951 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
955 break;
956 case 7:
957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
959 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
960 TILE_SPLIT(split_equal_to_row_size));
961 break;
962 case 8:
963 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
965 break;
966 case 9:
967 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
968 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
969 break;
970 case 10:
971 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
972 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
975 break;
976 case 11:
977 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
978 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
980 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
981 break;
982 case 12:
983 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
987 break;
988 case 13:
989 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
990 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
991 break;
992 case 14:
993 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
994 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
997 break;
998 case 16:
999 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1000 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1001 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1003 break;
1004 case 17:
1005 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1006 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1009 break;
1010 case 27:
1011 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1012 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1013 break;
1014 case 28:
1015 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1019 break;
1020 case 29:
1021 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1023 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1025 break;
1026 case 30:
1027 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1028 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1030 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1031 break;
1032 default:
1033 gb_tile_moden = 0;
1034 break;
1035 }
Alex Deucher39aee492013-04-10 13:41:25 -04001036 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001037 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1038 }
1039 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1040 switch (reg_offset) {
1041 case 0:
1042 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1045 NUM_BANKS(ADDR_SURF_16_BANK));
1046 break;
1047 case 1:
1048 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1051 NUM_BANKS(ADDR_SURF_16_BANK));
1052 break;
1053 case 2:
1054 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1057 NUM_BANKS(ADDR_SURF_16_BANK));
1058 break;
1059 case 3:
1060 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1063 NUM_BANKS(ADDR_SURF_16_BANK));
1064 break;
1065 case 4:
1066 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1069 NUM_BANKS(ADDR_SURF_8_BANK));
1070 break;
1071 case 5:
1072 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1075 NUM_BANKS(ADDR_SURF_4_BANK));
1076 break;
1077 case 6:
1078 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1081 NUM_BANKS(ADDR_SURF_2_BANK));
1082 break;
1083 case 8:
1084 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1087 NUM_BANKS(ADDR_SURF_16_BANK));
1088 break;
1089 case 9:
1090 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1091 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1092 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1093 NUM_BANKS(ADDR_SURF_16_BANK));
1094 break;
1095 case 10:
1096 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1099 NUM_BANKS(ADDR_SURF_16_BANK));
1100 break;
1101 case 11:
1102 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1105 NUM_BANKS(ADDR_SURF_16_BANK));
1106 break;
1107 case 12:
1108 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1111 NUM_BANKS(ADDR_SURF_8_BANK));
1112 break;
1113 case 13:
1114 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1115 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1116 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1117 NUM_BANKS(ADDR_SURF_4_BANK));
1118 break;
1119 case 14:
1120 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1123 NUM_BANKS(ADDR_SURF_2_BANK));
1124 break;
1125 default:
1126 gb_tile_moden = 0;
1127 break;
1128 }
1129 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1130 }
1131 } else if (num_pipe_configs == 4) {
1132 if (num_rbs == 4) {
1133 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1134 switch (reg_offset) {
1135 case 0:
1136 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1138 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1139 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1140 break;
1141 case 1:
1142 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1144 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1145 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1146 break;
1147 case 2:
1148 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1149 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1150 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1152 break;
1153 case 3:
1154 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1156 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1158 break;
1159 case 4:
1160 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1161 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1162 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1163 TILE_SPLIT(split_equal_to_row_size));
1164 break;
1165 case 5:
1166 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1167 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1168 break;
1169 case 6:
1170 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1174 break;
1175 case 7:
1176 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1178 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1179 TILE_SPLIT(split_equal_to_row_size));
1180 break;
1181 case 8:
1182 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1183 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1184 break;
1185 case 9:
1186 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1187 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1188 break;
1189 case 10:
1190 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1191 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1192 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1194 break;
1195 case 11:
1196 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1197 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1198 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1200 break;
1201 case 12:
1202 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1203 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1204 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1206 break;
1207 case 13:
1208 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1210 break;
1211 case 14:
1212 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1214 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1216 break;
1217 case 16:
1218 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1219 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1220 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1222 break;
1223 case 17:
1224 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1226 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1228 break;
1229 case 27:
1230 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1231 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1232 break;
1233 case 28:
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1236 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1238 break;
1239 case 29:
1240 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1241 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1242 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1244 break;
1245 case 30:
1246 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1247 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1248 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1250 break;
1251 default:
1252 gb_tile_moden = 0;
1253 break;
1254 }
Alex Deucher39aee492013-04-10 13:41:25 -04001255 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001256 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1257 }
1258 } else if (num_rbs < 4) {
1259 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1260 switch (reg_offset) {
1261 case 0:
1262 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1264 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1266 break;
1267 case 1:
1268 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1270 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1272 break;
1273 case 2:
1274 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1275 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1276 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1278 break;
1279 case 3:
1280 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1282 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1284 break;
1285 case 4:
1286 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1288 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1289 TILE_SPLIT(split_equal_to_row_size));
1290 break;
1291 case 5:
1292 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1294 break;
1295 case 6:
1296 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1300 break;
1301 case 7:
1302 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1304 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1305 TILE_SPLIT(split_equal_to_row_size));
1306 break;
1307 case 8:
1308 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1309 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1310 break;
1311 case 9:
1312 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1313 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1314 break;
1315 case 10:
1316 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1317 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1318 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1320 break;
1321 case 11:
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1326 break;
1327 case 12:
1328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1329 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1330 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1332 break;
1333 case 13:
1334 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1336 break;
1337 case 14:
1338 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1340 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1342 break;
1343 case 16:
1344 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1346 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1348 break;
1349 case 17:
1350 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1351 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1352 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1354 break;
1355 case 27:
1356 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1358 break;
1359 case 28:
1360 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1362 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1364 break;
1365 case 29:
1366 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1367 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1368 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1370 break;
1371 case 30:
1372 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1374 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1376 break;
1377 default:
1378 gb_tile_moden = 0;
1379 break;
1380 }
Alex Deucher39aee492013-04-10 13:41:25 -04001381 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001382 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1383 }
1384 }
1385 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1386 switch (reg_offset) {
1387 case 0:
1388 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1391 NUM_BANKS(ADDR_SURF_16_BANK));
1392 break;
1393 case 1:
1394 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1397 NUM_BANKS(ADDR_SURF_16_BANK));
1398 break;
1399 case 2:
1400 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1403 NUM_BANKS(ADDR_SURF_16_BANK));
1404 break;
1405 case 3:
1406 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1409 NUM_BANKS(ADDR_SURF_16_BANK));
1410 break;
1411 case 4:
1412 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1415 NUM_BANKS(ADDR_SURF_16_BANK));
1416 break;
1417 case 5:
1418 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1421 NUM_BANKS(ADDR_SURF_8_BANK));
1422 break;
1423 case 6:
1424 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1427 NUM_BANKS(ADDR_SURF_4_BANK));
1428 break;
1429 case 8:
1430 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1433 NUM_BANKS(ADDR_SURF_16_BANK));
1434 break;
1435 case 9:
1436 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1439 NUM_BANKS(ADDR_SURF_16_BANK));
1440 break;
1441 case 10:
1442 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1443 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1444 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1445 NUM_BANKS(ADDR_SURF_16_BANK));
1446 break;
1447 case 11:
1448 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1451 NUM_BANKS(ADDR_SURF_16_BANK));
1452 break;
1453 case 12:
1454 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1457 NUM_BANKS(ADDR_SURF_16_BANK));
1458 break;
1459 case 13:
1460 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1463 NUM_BANKS(ADDR_SURF_8_BANK));
1464 break;
1465 case 14:
1466 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1469 NUM_BANKS(ADDR_SURF_4_BANK));
1470 break;
1471 default:
1472 gb_tile_moden = 0;
1473 break;
1474 }
1475 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1476 }
1477 } else if (num_pipe_configs == 2) {
1478 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1479 switch (reg_offset) {
1480 case 0:
1481 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1482 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1483 PIPE_CONFIG(ADDR_SURF_P2) |
1484 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1485 break;
1486 case 1:
1487 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1488 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1489 PIPE_CONFIG(ADDR_SURF_P2) |
1490 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1491 break;
1492 case 2:
1493 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1494 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1495 PIPE_CONFIG(ADDR_SURF_P2) |
1496 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1497 break;
1498 case 3:
1499 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1500 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1501 PIPE_CONFIG(ADDR_SURF_P2) |
1502 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1503 break;
1504 case 4:
1505 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1506 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1507 PIPE_CONFIG(ADDR_SURF_P2) |
1508 TILE_SPLIT(split_equal_to_row_size));
1509 break;
1510 case 5:
1511 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1512 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1513 break;
1514 case 6:
1515 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517 PIPE_CONFIG(ADDR_SURF_P2) |
1518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1519 break;
1520 case 7:
1521 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1523 PIPE_CONFIG(ADDR_SURF_P2) |
1524 TILE_SPLIT(split_equal_to_row_size));
1525 break;
1526 case 8:
1527 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1528 break;
1529 case 9:
1530 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1532 break;
1533 case 10:
1534 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1535 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1536 PIPE_CONFIG(ADDR_SURF_P2) |
1537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1538 break;
1539 case 11:
1540 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1542 PIPE_CONFIG(ADDR_SURF_P2) |
1543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1544 break;
1545 case 12:
1546 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1547 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1548 PIPE_CONFIG(ADDR_SURF_P2) |
1549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1550 break;
1551 case 13:
1552 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1553 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1554 break;
1555 case 14:
1556 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1557 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1558 PIPE_CONFIG(ADDR_SURF_P2) |
1559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1560 break;
1561 case 16:
1562 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1564 PIPE_CONFIG(ADDR_SURF_P2) |
1565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1566 break;
1567 case 17:
1568 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1570 PIPE_CONFIG(ADDR_SURF_P2) |
1571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1572 break;
1573 case 27:
1574 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1575 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1576 break;
1577 case 28:
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1580 PIPE_CONFIG(ADDR_SURF_P2) |
1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1582 break;
1583 case 29:
1584 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1585 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1586 PIPE_CONFIG(ADDR_SURF_P2) |
1587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1588 break;
1589 case 30:
1590 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1591 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1592 PIPE_CONFIG(ADDR_SURF_P2) |
1593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1594 break;
1595 default:
1596 gb_tile_moden = 0;
1597 break;
1598 }
Alex Deucher39aee492013-04-10 13:41:25 -04001599 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001600 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1601 }
1602 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1603 switch (reg_offset) {
1604 case 0:
1605 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1608 NUM_BANKS(ADDR_SURF_16_BANK));
1609 break;
1610 case 1:
1611 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1612 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1613 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1614 NUM_BANKS(ADDR_SURF_16_BANK));
1615 break;
1616 case 2:
1617 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1620 NUM_BANKS(ADDR_SURF_16_BANK));
1621 break;
1622 case 3:
1623 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1624 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1625 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1626 NUM_BANKS(ADDR_SURF_16_BANK));
1627 break;
1628 case 4:
1629 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1632 NUM_BANKS(ADDR_SURF_16_BANK));
1633 break;
1634 case 5:
1635 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1638 NUM_BANKS(ADDR_SURF_16_BANK));
1639 break;
1640 case 6:
1641 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1644 NUM_BANKS(ADDR_SURF_8_BANK));
1645 break;
1646 case 8:
1647 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1650 NUM_BANKS(ADDR_SURF_16_BANK));
1651 break;
1652 case 9:
1653 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1656 NUM_BANKS(ADDR_SURF_16_BANK));
1657 break;
1658 case 10:
1659 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1662 NUM_BANKS(ADDR_SURF_16_BANK));
1663 break;
1664 case 11:
1665 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1666 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1667 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1668 NUM_BANKS(ADDR_SURF_16_BANK));
1669 break;
1670 case 12:
1671 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1674 NUM_BANKS(ADDR_SURF_16_BANK));
1675 break;
1676 case 13:
1677 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1680 NUM_BANKS(ADDR_SURF_16_BANK));
1681 break;
1682 case 14:
1683 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1686 NUM_BANKS(ADDR_SURF_8_BANK));
1687 break;
1688 default:
1689 gb_tile_moden = 0;
1690 break;
1691 }
1692 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1693 }
1694 } else
1695 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1696}
1697
1698/**
1699 * cik_select_se_sh - select which SE, SH to address
1700 *
1701 * @rdev: radeon_device pointer
1702 * @se_num: shader engine to address
1703 * @sh_num: sh block to address
1704 *
1705 * Select which SE, SH combinations to address. Certain
1706 * registers are instanced per SE or SH. 0xffffffff means
1707 * broadcast to all SEs or SHs (CIK).
1708 */
1709static void cik_select_se_sh(struct radeon_device *rdev,
1710 u32 se_num, u32 sh_num)
1711{
1712 u32 data = INSTANCE_BROADCAST_WRITES;
1713
1714 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04001715 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001716 else if (se_num == 0xffffffff)
1717 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1718 else if (sh_num == 0xffffffff)
1719 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1720 else
1721 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1722 WREG32(GRBM_GFX_INDEX, data);
1723}
1724
1725/**
1726 * cik_create_bitmask - create a bitmask
1727 *
1728 * @bit_width: length of the mask
1729 *
1730 * create a variable length bit mask (CIK).
1731 * Returns the bitmask.
1732 */
1733static u32 cik_create_bitmask(u32 bit_width)
1734{
1735 u32 i, mask = 0;
1736
1737 for (i = 0; i < bit_width; i++) {
1738 mask <<= 1;
1739 mask |= 1;
1740 }
1741 return mask;
1742}
1743
1744/**
1745 * cik_select_se_sh - select which SE, SH to address
1746 *
1747 * @rdev: radeon_device pointer
1748 * @max_rb_num: max RBs (render backends) for the asic
1749 * @se_num: number of SEs (shader engines) for the asic
1750 * @sh_per_se: number of SH blocks per SE for the asic
1751 *
1752 * Calculates the bitmask of disabled RBs (CIK).
1753 * Returns the disabled RB bitmask.
1754 */
1755static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1756 u32 max_rb_num, u32 se_num,
1757 u32 sh_per_se)
1758{
1759 u32 data, mask;
1760
1761 data = RREG32(CC_RB_BACKEND_DISABLE);
1762 if (data & 1)
1763 data &= BACKEND_DISABLE_MASK;
1764 else
1765 data = 0;
1766 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1767
1768 data >>= BACKEND_DISABLE_SHIFT;
1769
1770 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1771
1772 return data & mask;
1773}
1774
1775/**
1776 * cik_setup_rb - setup the RBs on the asic
1777 *
1778 * @rdev: radeon_device pointer
1779 * @se_num: number of SEs (shader engines) for the asic
1780 * @sh_per_se: number of SH blocks per SE for the asic
1781 * @max_rb_num: max RBs (render backends) for the asic
1782 *
1783 * Configures per-SE/SH RB registers (CIK).
1784 */
1785static void cik_setup_rb(struct radeon_device *rdev,
1786 u32 se_num, u32 sh_per_se,
1787 u32 max_rb_num)
1788{
1789 int i, j;
1790 u32 data, mask;
1791 u32 disabled_rbs = 0;
1792 u32 enabled_rbs = 0;
1793
1794 for (i = 0; i < se_num; i++) {
1795 for (j = 0; j < sh_per_se; j++) {
1796 cik_select_se_sh(rdev, i, j);
1797 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1798 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1799 }
1800 }
1801 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1802
1803 mask = 1;
1804 for (i = 0; i < max_rb_num; i++) {
1805 if (!(disabled_rbs & mask))
1806 enabled_rbs |= mask;
1807 mask <<= 1;
1808 }
1809
1810 for (i = 0; i < se_num; i++) {
1811 cik_select_se_sh(rdev, i, 0xffffffff);
1812 data = 0;
1813 for (j = 0; j < sh_per_se; j++) {
1814 switch (enabled_rbs & 3) {
1815 case 1:
1816 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1817 break;
1818 case 2:
1819 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1820 break;
1821 case 3:
1822 default:
1823 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1824 break;
1825 }
1826 enabled_rbs >>= 2;
1827 }
1828 WREG32(PA_SC_RASTER_CONFIG, data);
1829 }
1830 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1831}
1832
1833/**
1834 * cik_gpu_init - setup the 3D engine
1835 *
1836 * @rdev: radeon_device pointer
1837 *
1838 * Configures the 3D engine and tiling configuration
1839 * registers so that the 3D engine is usable.
1840 */
1841static void cik_gpu_init(struct radeon_device *rdev)
1842{
1843 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1844 u32 mc_shared_chmap, mc_arb_ramcfg;
1845 u32 hdp_host_path_cntl;
1846 u32 tmp;
1847 int i, j;
1848
1849 switch (rdev->family) {
1850 case CHIP_BONAIRE:
1851 rdev->config.cik.max_shader_engines = 2;
1852 rdev->config.cik.max_tile_pipes = 4;
1853 rdev->config.cik.max_cu_per_sh = 7;
1854 rdev->config.cik.max_sh_per_se = 1;
1855 rdev->config.cik.max_backends_per_se = 2;
1856 rdev->config.cik.max_texture_channel_caches = 4;
1857 rdev->config.cik.max_gprs = 256;
1858 rdev->config.cik.max_gs_threads = 32;
1859 rdev->config.cik.max_hw_contexts = 8;
1860
1861 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1862 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1863 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1864 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1865 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1866 break;
1867 case CHIP_KAVERI:
1868 /* TODO */
1869 break;
1870 case CHIP_KABINI:
1871 default:
1872 rdev->config.cik.max_shader_engines = 1;
1873 rdev->config.cik.max_tile_pipes = 2;
1874 rdev->config.cik.max_cu_per_sh = 2;
1875 rdev->config.cik.max_sh_per_se = 1;
1876 rdev->config.cik.max_backends_per_se = 1;
1877 rdev->config.cik.max_texture_channel_caches = 2;
1878 rdev->config.cik.max_gprs = 256;
1879 rdev->config.cik.max_gs_threads = 16;
1880 rdev->config.cik.max_hw_contexts = 8;
1881
1882 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1883 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1884 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1885 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1886 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1887 break;
1888 }
1889
1890 /* Initialize HDP */
1891 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1892 WREG32((0x2c14 + j), 0x00000000);
1893 WREG32((0x2c18 + j), 0x00000000);
1894 WREG32((0x2c1c + j), 0x00000000);
1895 WREG32((0x2c20 + j), 0x00000000);
1896 WREG32((0x2c24 + j), 0x00000000);
1897 }
1898
1899 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1900
1901 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1902
1903 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1904 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1905
1906 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1907 rdev->config.cik.mem_max_burst_length_bytes = 256;
1908 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1909 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1910 if (rdev->config.cik.mem_row_size_in_kb > 4)
1911 rdev->config.cik.mem_row_size_in_kb = 4;
1912 /* XXX use MC settings? */
1913 rdev->config.cik.shader_engine_tile_size = 32;
1914 rdev->config.cik.num_gpus = 1;
1915 rdev->config.cik.multi_gpu_tile_size = 64;
1916
1917 /* fix up row size */
1918 gb_addr_config &= ~ROW_SIZE_MASK;
1919 switch (rdev->config.cik.mem_row_size_in_kb) {
1920 case 1:
1921 default:
1922 gb_addr_config |= ROW_SIZE(0);
1923 break;
1924 case 2:
1925 gb_addr_config |= ROW_SIZE(1);
1926 break;
1927 case 4:
1928 gb_addr_config |= ROW_SIZE(2);
1929 break;
1930 }
1931
1932 /* setup tiling info dword. gb_addr_config is not adequate since it does
1933 * not have bank info, so create a custom tiling dword.
1934 * bits 3:0 num_pipes
1935 * bits 7:4 num_banks
1936 * bits 11:8 group_size
1937 * bits 15:12 row_size
1938 */
1939 rdev->config.cik.tile_config = 0;
1940 switch (rdev->config.cik.num_tile_pipes) {
1941 case 1:
1942 rdev->config.cik.tile_config |= (0 << 0);
1943 break;
1944 case 2:
1945 rdev->config.cik.tile_config |= (1 << 0);
1946 break;
1947 case 4:
1948 rdev->config.cik.tile_config |= (2 << 0);
1949 break;
1950 case 8:
1951 default:
1952 /* XXX what about 12? */
1953 rdev->config.cik.tile_config |= (3 << 0);
1954 break;
1955 }
1956 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1957 rdev->config.cik.tile_config |= 1 << 4;
1958 else
1959 rdev->config.cik.tile_config |= 0 << 4;
1960 rdev->config.cik.tile_config |=
1961 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1962 rdev->config.cik.tile_config |=
1963 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1964
1965 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1966 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1967 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001968 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1969 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001970 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1971 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1972 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001973
1974 cik_tiling_mode_table_init(rdev);
1975
1976 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1977 rdev->config.cik.max_sh_per_se,
1978 rdev->config.cik.max_backends_per_se);
1979
1980 /* set HW defaults for 3D engine */
1981 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1982
1983 WREG32(SX_DEBUG_1, 0x20);
1984
1985 WREG32(TA_CNTL_AUX, 0x00010000);
1986
1987 tmp = RREG32(SPI_CONFIG_CNTL);
1988 tmp |= 0x03000000;
1989 WREG32(SPI_CONFIG_CNTL, tmp);
1990
1991 WREG32(SQ_CONFIG, 1);
1992
1993 WREG32(DB_DEBUG, 0);
1994
1995 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1996 tmp |= 0x00000400;
1997 WREG32(DB_DEBUG2, tmp);
1998
1999 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2000 tmp |= 0x00020200;
2001 WREG32(DB_DEBUG3, tmp);
2002
2003 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2004 tmp |= 0x00018208;
2005 WREG32(CB_HW_CONTROL, tmp);
2006
2007 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2008
2009 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2010 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2011 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2012 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2013
2014 WREG32(VGT_NUM_INSTANCES, 1);
2015
2016 WREG32(CP_PERFMON_CNTL, 0);
2017
2018 WREG32(SQ_CONFIG, 0);
2019
2020 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2021 FORCE_EOV_MAX_REZ_CNT(255)));
2022
2023 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2024 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2025
2026 WREG32(VGT_GS_VERTEX_REUSE, 16);
2027 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2028
2029 tmp = RREG32(HDP_MISC_CNTL);
2030 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2031 WREG32(HDP_MISC_CNTL, tmp);
2032
2033 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2034 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2035
2036 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2037 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2038
2039 udelay(50);
2040}
2041
Alex Deucher841cf442012-12-18 21:47:44 -05002042/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002043 * GPU scratch registers helpers function.
2044 */
2045/**
2046 * cik_scratch_init - setup driver info for CP scratch regs
2047 *
2048 * @rdev: radeon_device pointer
2049 *
2050 * Set up the number and offset of the CP scratch registers.
2051 * NOTE: use of CP scratch registers is a legacy inferface and
2052 * is not used by default on newer asics (r6xx+). On newer asics,
2053 * memory buffers are used for fences rather than scratch regs.
2054 */
2055static void cik_scratch_init(struct radeon_device *rdev)
2056{
2057 int i;
2058
2059 rdev->scratch.num_reg = 7;
2060 rdev->scratch.reg_base = SCRATCH_REG0;
2061 for (i = 0; i < rdev->scratch.num_reg; i++) {
2062 rdev->scratch.free[i] = true;
2063 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2064 }
2065}
2066
2067/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002068 * cik_ring_test - basic gfx ring test
2069 *
2070 * @rdev: radeon_device pointer
2071 * @ring: radeon_ring structure holding ring information
2072 *
2073 * Allocate a scratch register and write to it using the gfx ring (CIK).
2074 * Provides a basic gfx ring test to verify that the ring is working.
2075 * Used by cik_cp_gfx_resume();
2076 * Returns 0 on success, error on failure.
2077 */
2078int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2079{
2080 uint32_t scratch;
2081 uint32_t tmp = 0;
2082 unsigned i;
2083 int r;
2084
2085 r = radeon_scratch_get(rdev, &scratch);
2086 if (r) {
2087 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2088 return r;
2089 }
2090 WREG32(scratch, 0xCAFEDEAD);
2091 r = radeon_ring_lock(rdev, ring, 3);
2092 if (r) {
2093 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2094 radeon_scratch_free(rdev, scratch);
2095 return r;
2096 }
2097 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2098 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2099 radeon_ring_write(ring, 0xDEADBEEF);
2100 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002101
Alex Deucherfbc832c2012-07-20 14:41:35 -04002102 for (i = 0; i < rdev->usec_timeout; i++) {
2103 tmp = RREG32(scratch);
2104 if (tmp == 0xDEADBEEF)
2105 break;
2106 DRM_UDELAY(1);
2107 }
2108 if (i < rdev->usec_timeout) {
2109 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2110 } else {
2111 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2112 ring->idx, scratch, tmp);
2113 r = -EINVAL;
2114 }
2115 radeon_scratch_free(rdev, scratch);
2116 return r;
2117}
2118
2119/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002120 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002121 *
2122 * @rdev: radeon_device pointer
2123 * @fence: radeon fence object
2124 *
2125 * Emits a fence sequnce number on the gfx ring and flushes
2126 * GPU caches.
2127 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002128void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2129 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002130{
2131 struct radeon_ring *ring = &rdev->ring[fence->ring];
2132 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2133
2134 /* EVENT_WRITE_EOP - flush caches, send int */
2135 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2136 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2137 EOP_TC_ACTION_EN |
2138 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2139 EVENT_INDEX(5)));
2140 radeon_ring_write(ring, addr & 0xfffffffc);
2141 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2142 radeon_ring_write(ring, fence->seq);
2143 radeon_ring_write(ring, 0);
2144 /* HDP flush */
2145 /* We should be using the new WAIT_REG_MEM special op packet here
2146 * but it causes the CP to hang
2147 */
2148 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2149 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2150 WRITE_DATA_DST_SEL(0)));
2151 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2152 radeon_ring_write(ring, 0);
2153 radeon_ring_write(ring, 0);
2154}
2155
Alex Deucherb07fdd32013-04-11 09:36:17 -04002156/**
2157 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2158 *
2159 * @rdev: radeon_device pointer
2160 * @fence: radeon fence object
2161 *
2162 * Emits a fence sequnce number on the compute ring and flushes
2163 * GPU caches.
2164 */
2165void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2166 struct radeon_fence *fence)
2167{
2168 struct radeon_ring *ring = &rdev->ring[fence->ring];
2169 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2170
2171 /* RELEASE_MEM - flush caches, send int */
2172 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2173 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2174 EOP_TC_ACTION_EN |
2175 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2176 EVENT_INDEX(5)));
2177 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2178 radeon_ring_write(ring, addr & 0xfffffffc);
2179 radeon_ring_write(ring, upper_32_bits(addr));
2180 radeon_ring_write(ring, fence->seq);
2181 radeon_ring_write(ring, 0);
2182 /* HDP flush */
2183 /* We should be using the new WAIT_REG_MEM special op packet here
2184 * but it causes the CP to hang
2185 */
2186 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2187 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2188 WRITE_DATA_DST_SEL(0)));
2189 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2190 radeon_ring_write(ring, 0);
2191 radeon_ring_write(ring, 0);
2192}
2193
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002194void cik_semaphore_ring_emit(struct radeon_device *rdev,
2195 struct radeon_ring *ring,
2196 struct radeon_semaphore *semaphore,
2197 bool emit_wait)
2198{
2199 uint64_t addr = semaphore->gpu_addr;
2200 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2201
2202 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2203 radeon_ring_write(ring, addr & 0xffffffff);
2204 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2205}
2206
2207/*
2208 * IB stuff
2209 */
2210/**
2211 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2212 *
2213 * @rdev: radeon_device pointer
2214 * @ib: radeon indirect buffer object
2215 *
2216 * Emits an DE (drawing engine) or CE (constant engine) IB
2217 * on the gfx ring. IBs are usually generated by userspace
2218 * acceleration drivers and submitted to the kernel for
2219 * sheduling on the ring. This function schedules the IB
2220 * on the gfx ring for execution by the GPU.
2221 */
2222void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2223{
2224 struct radeon_ring *ring = &rdev->ring[ib->ring];
2225 u32 header, control = INDIRECT_BUFFER_VALID;
2226
2227 if (ib->is_const_ib) {
2228 /* set switch buffer packet before const IB */
2229 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2230 radeon_ring_write(ring, 0);
2231
2232 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2233 } else {
2234 u32 next_rptr;
2235 if (ring->rptr_save_reg) {
2236 next_rptr = ring->wptr + 3 + 4;
2237 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2238 radeon_ring_write(ring, ((ring->rptr_save_reg -
2239 PACKET3_SET_UCONFIG_REG_START) >> 2));
2240 radeon_ring_write(ring, next_rptr);
2241 } else if (rdev->wb.enabled) {
2242 next_rptr = ring->wptr + 5 + 4;
2243 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2244 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2245 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2246 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2247 radeon_ring_write(ring, next_rptr);
2248 }
2249
2250 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2251 }
2252
2253 control |= ib->length_dw |
2254 (ib->vm ? (ib->vm->id << 24) : 0);
2255
2256 radeon_ring_write(ring, header);
2257 radeon_ring_write(ring,
2258#ifdef __BIG_ENDIAN
2259 (2 << 0) |
2260#endif
2261 (ib->gpu_addr & 0xFFFFFFFC));
2262 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2263 radeon_ring_write(ring, control);
2264}
2265
Alex Deucherfbc832c2012-07-20 14:41:35 -04002266/**
2267 * cik_ib_test - basic gfx ring IB test
2268 *
2269 * @rdev: radeon_device pointer
2270 * @ring: radeon_ring structure holding ring information
2271 *
2272 * Allocate an IB and execute it on the gfx ring (CIK).
2273 * Provides a basic gfx ring test to verify that IBs are working.
2274 * Returns 0 on success, error on failure.
2275 */
2276int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2277{
2278 struct radeon_ib ib;
2279 uint32_t scratch;
2280 uint32_t tmp = 0;
2281 unsigned i;
2282 int r;
2283
2284 r = radeon_scratch_get(rdev, &scratch);
2285 if (r) {
2286 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2287 return r;
2288 }
2289 WREG32(scratch, 0xCAFEDEAD);
2290 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2291 if (r) {
2292 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2293 return r;
2294 }
2295 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2296 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2297 ib.ptr[2] = 0xDEADBEEF;
2298 ib.length_dw = 3;
2299 r = radeon_ib_schedule(rdev, &ib, NULL);
2300 if (r) {
2301 radeon_scratch_free(rdev, scratch);
2302 radeon_ib_free(rdev, &ib);
2303 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2304 return r;
2305 }
2306 r = radeon_fence_wait(ib.fence, false);
2307 if (r) {
2308 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2309 return r;
2310 }
2311 for (i = 0; i < rdev->usec_timeout; i++) {
2312 tmp = RREG32(scratch);
2313 if (tmp == 0xDEADBEEF)
2314 break;
2315 DRM_UDELAY(1);
2316 }
2317 if (i < rdev->usec_timeout) {
2318 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2319 } else {
2320 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2321 scratch, tmp);
2322 r = -EINVAL;
2323 }
2324 radeon_scratch_free(rdev, scratch);
2325 radeon_ib_free(rdev, &ib);
2326 return r;
2327}
2328
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002329/*
Alex Deucher841cf442012-12-18 21:47:44 -05002330 * CP.
2331 * On CIK, gfx and compute now have independant command processors.
2332 *
2333 * GFX
2334 * Gfx consists of a single ring and can process both gfx jobs and
2335 * compute jobs. The gfx CP consists of three microengines (ME):
2336 * PFP - Pre-Fetch Parser
2337 * ME - Micro Engine
2338 * CE - Constant Engine
2339 * The PFP and ME make up what is considered the Drawing Engine (DE).
2340 * The CE is an asynchronous engine used for updating buffer desciptors
2341 * used by the DE so that they can be loaded into cache in parallel
2342 * while the DE is processing state update packets.
2343 *
2344 * Compute
2345 * The compute CP consists of two microengines (ME):
2346 * MEC1 - Compute MicroEngine 1
2347 * MEC2 - Compute MicroEngine 2
2348 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2349 * The queues are exposed to userspace and are programmed directly
2350 * by the compute runtime.
2351 */
2352/**
2353 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2354 *
2355 * @rdev: radeon_device pointer
2356 * @enable: enable or disable the MEs
2357 *
2358 * Halts or unhalts the gfx MEs.
2359 */
2360static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2361{
2362 if (enable)
2363 WREG32(CP_ME_CNTL, 0);
2364 else {
2365 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2366 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2367 }
2368 udelay(50);
2369}
2370
2371/**
2372 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2373 *
2374 * @rdev: radeon_device pointer
2375 *
2376 * Loads the gfx PFP, ME, and CE ucode.
2377 * Returns 0 for success, -EINVAL if the ucode is not available.
2378 */
2379static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2380{
2381 const __be32 *fw_data;
2382 int i;
2383
2384 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2385 return -EINVAL;
2386
2387 cik_cp_gfx_enable(rdev, false);
2388
2389 /* PFP */
2390 fw_data = (const __be32 *)rdev->pfp_fw->data;
2391 WREG32(CP_PFP_UCODE_ADDR, 0);
2392 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2393 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2394 WREG32(CP_PFP_UCODE_ADDR, 0);
2395
2396 /* CE */
2397 fw_data = (const __be32 *)rdev->ce_fw->data;
2398 WREG32(CP_CE_UCODE_ADDR, 0);
2399 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2400 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2401 WREG32(CP_CE_UCODE_ADDR, 0);
2402
2403 /* ME */
2404 fw_data = (const __be32 *)rdev->me_fw->data;
2405 WREG32(CP_ME_RAM_WADDR, 0);
2406 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2407 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2408 WREG32(CP_ME_RAM_WADDR, 0);
2409
2410 WREG32(CP_PFP_UCODE_ADDR, 0);
2411 WREG32(CP_CE_UCODE_ADDR, 0);
2412 WREG32(CP_ME_RAM_WADDR, 0);
2413 WREG32(CP_ME_RAM_RADDR, 0);
2414 return 0;
2415}
2416
2417/**
2418 * cik_cp_gfx_start - start the gfx ring
2419 *
2420 * @rdev: radeon_device pointer
2421 *
2422 * Enables the ring and loads the clear state context and other
2423 * packets required to init the ring.
2424 * Returns 0 for success, error for failure.
2425 */
2426static int cik_cp_gfx_start(struct radeon_device *rdev)
2427{
2428 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2429 int r, i;
2430
2431 /* init the CP */
2432 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2433 WREG32(CP_ENDIAN_SWAP, 0);
2434 WREG32(CP_DEVICE_ID, 1);
2435
2436 cik_cp_gfx_enable(rdev, true);
2437
2438 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2439 if (r) {
2440 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2441 return r;
2442 }
2443
2444 /* init the CE partitions. CE only used for gfx on CIK */
2445 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2446 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2447 radeon_ring_write(ring, 0xc000);
2448 radeon_ring_write(ring, 0xc000);
2449
2450 /* setup clear context state */
2451 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2452 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2453
2454 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2455 radeon_ring_write(ring, 0x80000000);
2456 radeon_ring_write(ring, 0x80000000);
2457
2458 for (i = 0; i < cik_default_size; i++)
2459 radeon_ring_write(ring, cik_default_state[i]);
2460
2461 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2463
2464 /* set clear context state */
2465 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2466 radeon_ring_write(ring, 0);
2467
2468 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2469 radeon_ring_write(ring, 0x00000316);
2470 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2471 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2472
2473 radeon_ring_unlock_commit(rdev, ring);
2474
2475 return 0;
2476}
2477
2478/**
2479 * cik_cp_gfx_fini - stop the gfx ring
2480 *
2481 * @rdev: radeon_device pointer
2482 *
2483 * Stop the gfx ring and tear down the driver ring
2484 * info.
2485 */
2486static void cik_cp_gfx_fini(struct radeon_device *rdev)
2487{
2488 cik_cp_gfx_enable(rdev, false);
2489 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2490}
2491
2492/**
2493 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2494 *
2495 * @rdev: radeon_device pointer
2496 *
2497 * Program the location and size of the gfx ring buffer
2498 * and test it to make sure it's working.
2499 * Returns 0 for success, error for failure.
2500 */
2501static int cik_cp_gfx_resume(struct radeon_device *rdev)
2502{
2503 struct radeon_ring *ring;
2504 u32 tmp;
2505 u32 rb_bufsz;
2506 u64 rb_addr;
2507 int r;
2508
2509 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2510 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2511
2512 /* Set the write pointer delay */
2513 WREG32(CP_RB_WPTR_DELAY, 0);
2514
2515 /* set the RB to use vmid 0 */
2516 WREG32(CP_RB_VMID, 0);
2517
2518 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2519
2520 /* ring 0 - compute and gfx */
2521 /* Set ring buffer size */
2522 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2523 rb_bufsz = drm_order(ring->ring_size / 8);
2524 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2525#ifdef __BIG_ENDIAN
2526 tmp |= BUF_SWAP_32BIT;
2527#endif
2528 WREG32(CP_RB0_CNTL, tmp);
2529
2530 /* Initialize the ring buffer's read and write pointers */
2531 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2532 ring->wptr = 0;
2533 WREG32(CP_RB0_WPTR, ring->wptr);
2534
2535 /* set the wb address wether it's enabled or not */
2536 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2537 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2538
2539 /* scratch register shadowing is no longer supported */
2540 WREG32(SCRATCH_UMSK, 0);
2541
2542 if (!rdev->wb.enabled)
2543 tmp |= RB_NO_UPDATE;
2544
2545 mdelay(1);
2546 WREG32(CP_RB0_CNTL, tmp);
2547
2548 rb_addr = ring->gpu_addr >> 8;
2549 WREG32(CP_RB0_BASE, rb_addr);
2550 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2551
2552 ring->rptr = RREG32(CP_RB0_RPTR);
2553
2554 /* start the ring */
2555 cik_cp_gfx_start(rdev);
2556 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2557 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2558 if (r) {
2559 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2560 return r;
2561 }
2562 return 0;
2563}
2564
Alex Deucher963e81f2013-06-26 17:37:11 -04002565u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2566 struct radeon_ring *ring)
2567{
2568 u32 rptr;
2569
2570
2571
2572 if (rdev->wb.enabled) {
2573 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2574 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002575 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002576 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2577 rptr = RREG32(CP_HQD_PQ_RPTR);
2578 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002579 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002580 }
2581 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2582
2583 return rptr;
2584}
2585
2586u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2587 struct radeon_ring *ring)
2588{
2589 u32 wptr;
2590
2591 if (rdev->wb.enabled) {
2592 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2593 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002594 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002595 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2596 wptr = RREG32(CP_HQD_PQ_WPTR);
2597 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002598 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002599 }
2600 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2601
2602 return wptr;
2603}
2604
2605void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2606 struct radeon_ring *ring)
2607{
2608 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2609
2610 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2611 WDOORBELL32(ring->doorbell_offset, wptr);
2612}
2613
Alex Deucher841cf442012-12-18 21:47:44 -05002614/**
2615 * cik_cp_compute_enable - enable/disable the compute CP MEs
2616 *
2617 * @rdev: radeon_device pointer
2618 * @enable: enable or disable the MEs
2619 *
2620 * Halts or unhalts the compute MEs.
2621 */
2622static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2623{
2624 if (enable)
2625 WREG32(CP_MEC_CNTL, 0);
2626 else
2627 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2628 udelay(50);
2629}
2630
2631/**
2632 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2633 *
2634 * @rdev: radeon_device pointer
2635 *
2636 * Loads the compute MEC1&2 ucode.
2637 * Returns 0 for success, -EINVAL if the ucode is not available.
2638 */
2639static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2640{
2641 const __be32 *fw_data;
2642 int i;
2643
2644 if (!rdev->mec_fw)
2645 return -EINVAL;
2646
2647 cik_cp_compute_enable(rdev, false);
2648
2649 /* MEC1 */
2650 fw_data = (const __be32 *)rdev->mec_fw->data;
2651 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2652 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2653 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2654 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2655
2656 if (rdev->family == CHIP_KAVERI) {
2657 /* MEC2 */
2658 fw_data = (const __be32 *)rdev->mec_fw->data;
2659 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2660 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2661 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2662 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2663 }
2664
2665 return 0;
2666}
2667
2668/**
2669 * cik_cp_compute_start - start the compute queues
2670 *
2671 * @rdev: radeon_device pointer
2672 *
2673 * Enable the compute queues.
2674 * Returns 0 for success, error for failure.
2675 */
2676static int cik_cp_compute_start(struct radeon_device *rdev)
2677{
Alex Deucher963e81f2013-06-26 17:37:11 -04002678 cik_cp_compute_enable(rdev, true);
2679
Alex Deucher841cf442012-12-18 21:47:44 -05002680 return 0;
2681}
2682
2683/**
2684 * cik_cp_compute_fini - stop the compute queues
2685 *
2686 * @rdev: radeon_device pointer
2687 *
2688 * Stop the compute queues and tear down the driver queue
2689 * info.
2690 */
2691static void cik_cp_compute_fini(struct radeon_device *rdev)
2692{
Alex Deucher963e81f2013-06-26 17:37:11 -04002693 int i, idx, r;
2694
Alex Deucher841cf442012-12-18 21:47:44 -05002695 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002696
2697 for (i = 0; i < 2; i++) {
2698 if (i == 0)
2699 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2700 else
2701 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2702
2703 if (rdev->ring[idx].mqd_obj) {
2704 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2705 if (unlikely(r != 0))
2706 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2707
2708 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2709 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2710
2711 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2712 rdev->ring[idx].mqd_obj = NULL;
2713 }
2714 }
Alex Deucher841cf442012-12-18 21:47:44 -05002715}
2716
Alex Deucher963e81f2013-06-26 17:37:11 -04002717static void cik_mec_fini(struct radeon_device *rdev)
2718{
2719 int r;
2720
2721 if (rdev->mec.hpd_eop_obj) {
2722 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2723 if (unlikely(r != 0))
2724 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2725 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2726 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2727
2728 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2729 rdev->mec.hpd_eop_obj = NULL;
2730 }
2731}
2732
2733#define MEC_HPD_SIZE 2048
2734
2735static int cik_mec_init(struct radeon_device *rdev)
2736{
2737 int r;
2738 u32 *hpd;
2739
2740 /*
2741 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2742 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2743 */
2744 if (rdev->family == CHIP_KAVERI)
2745 rdev->mec.num_mec = 2;
2746 else
2747 rdev->mec.num_mec = 1;
2748 rdev->mec.num_pipe = 4;
2749 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2750
2751 if (rdev->mec.hpd_eop_obj == NULL) {
2752 r = radeon_bo_create(rdev,
2753 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2754 PAGE_SIZE, true,
2755 RADEON_GEM_DOMAIN_GTT, NULL,
2756 &rdev->mec.hpd_eop_obj);
2757 if (r) {
2758 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2759 return r;
2760 }
2761 }
2762
2763 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2764 if (unlikely(r != 0)) {
2765 cik_mec_fini(rdev);
2766 return r;
2767 }
2768 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2769 &rdev->mec.hpd_eop_gpu_addr);
2770 if (r) {
2771 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2772 cik_mec_fini(rdev);
2773 return r;
2774 }
2775 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2776 if (r) {
2777 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2778 cik_mec_fini(rdev);
2779 return r;
2780 }
2781
2782 /* clear memory. Not sure if this is required or not */
2783 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2784
2785 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2786 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2787
2788 return 0;
2789}
2790
2791struct hqd_registers
2792{
2793 u32 cp_mqd_base_addr;
2794 u32 cp_mqd_base_addr_hi;
2795 u32 cp_hqd_active;
2796 u32 cp_hqd_vmid;
2797 u32 cp_hqd_persistent_state;
2798 u32 cp_hqd_pipe_priority;
2799 u32 cp_hqd_queue_priority;
2800 u32 cp_hqd_quantum;
2801 u32 cp_hqd_pq_base;
2802 u32 cp_hqd_pq_base_hi;
2803 u32 cp_hqd_pq_rptr;
2804 u32 cp_hqd_pq_rptr_report_addr;
2805 u32 cp_hqd_pq_rptr_report_addr_hi;
2806 u32 cp_hqd_pq_wptr_poll_addr;
2807 u32 cp_hqd_pq_wptr_poll_addr_hi;
2808 u32 cp_hqd_pq_doorbell_control;
2809 u32 cp_hqd_pq_wptr;
2810 u32 cp_hqd_pq_control;
2811 u32 cp_hqd_ib_base_addr;
2812 u32 cp_hqd_ib_base_addr_hi;
2813 u32 cp_hqd_ib_rptr;
2814 u32 cp_hqd_ib_control;
2815 u32 cp_hqd_iq_timer;
2816 u32 cp_hqd_iq_rptr;
2817 u32 cp_hqd_dequeue_request;
2818 u32 cp_hqd_dma_offload;
2819 u32 cp_hqd_sema_cmd;
2820 u32 cp_hqd_msg_type;
2821 u32 cp_hqd_atomic0_preop_lo;
2822 u32 cp_hqd_atomic0_preop_hi;
2823 u32 cp_hqd_atomic1_preop_lo;
2824 u32 cp_hqd_atomic1_preop_hi;
2825 u32 cp_hqd_hq_scheduler0;
2826 u32 cp_hqd_hq_scheduler1;
2827 u32 cp_mqd_control;
2828};
2829
2830struct bonaire_mqd
2831{
2832 u32 header;
2833 u32 dispatch_initiator;
2834 u32 dimensions[3];
2835 u32 start_idx[3];
2836 u32 num_threads[3];
2837 u32 pipeline_stat_enable;
2838 u32 perf_counter_enable;
2839 u32 pgm[2];
2840 u32 tba[2];
2841 u32 tma[2];
2842 u32 pgm_rsrc[2];
2843 u32 vmid;
2844 u32 resource_limits;
2845 u32 static_thread_mgmt01[2];
2846 u32 tmp_ring_size;
2847 u32 static_thread_mgmt23[2];
2848 u32 restart[3];
2849 u32 thread_trace_enable;
2850 u32 reserved1;
2851 u32 user_data[16];
2852 u32 vgtcs_invoke_count[2];
2853 struct hqd_registers queue_state;
2854 u32 dequeue_cntr;
2855 u32 interrupt_queue[64];
2856};
2857
Alex Deucher841cf442012-12-18 21:47:44 -05002858/**
2859 * cik_cp_compute_resume - setup the compute queue registers
2860 *
2861 * @rdev: radeon_device pointer
2862 *
2863 * Program the compute queues and test them to make sure they
2864 * are working.
2865 * Returns 0 for success, error for failure.
2866 */
2867static int cik_cp_compute_resume(struct radeon_device *rdev)
2868{
Alex Deucher963e81f2013-06-26 17:37:11 -04002869 int r, i, idx;
2870 u32 tmp;
2871 bool use_doorbell = true;
2872 u64 hqd_gpu_addr;
2873 u64 mqd_gpu_addr;
2874 u64 eop_gpu_addr;
2875 u64 wb_gpu_addr;
2876 u32 *buf;
2877 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002878
Alex Deucher841cf442012-12-18 21:47:44 -05002879 r = cik_cp_compute_start(rdev);
2880 if (r)
2881 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002882
2883 /* fix up chicken bits */
2884 tmp = RREG32(CP_CPF_DEBUG);
2885 tmp |= (1 << 23);
2886 WREG32(CP_CPF_DEBUG, tmp);
2887
2888 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04002889 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002890 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2891 int me = (i < 4) ? 1 : 2;
2892 int pipe = (i < 4) ? i : (i - 4);
2893
2894 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2895
2896 cik_srbm_select(rdev, me, pipe, 0, 0);
2897
2898 /* write the EOP addr */
2899 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2900 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2901
2902 /* set the VMID assigned */
2903 WREG32(CP_HPD_EOP_VMID, 0);
2904
2905 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2906 tmp = RREG32(CP_HPD_EOP_CONTROL);
2907 tmp &= ~EOP_SIZE_MASK;
2908 tmp |= drm_order(MEC_HPD_SIZE / 8);
2909 WREG32(CP_HPD_EOP_CONTROL, tmp);
2910 }
2911 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002912 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002913
2914 /* init the queues. Just two for now. */
2915 for (i = 0; i < 2; i++) {
2916 if (i == 0)
2917 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2918 else
2919 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2920
2921 if (rdev->ring[idx].mqd_obj == NULL) {
2922 r = radeon_bo_create(rdev,
2923 sizeof(struct bonaire_mqd),
2924 PAGE_SIZE, true,
2925 RADEON_GEM_DOMAIN_GTT, NULL,
2926 &rdev->ring[idx].mqd_obj);
2927 if (r) {
2928 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2929 return r;
2930 }
2931 }
2932
2933 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2934 if (unlikely(r != 0)) {
2935 cik_cp_compute_fini(rdev);
2936 return r;
2937 }
2938 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2939 &mqd_gpu_addr);
2940 if (r) {
2941 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2942 cik_cp_compute_fini(rdev);
2943 return r;
2944 }
2945 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2946 if (r) {
2947 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2948 cik_cp_compute_fini(rdev);
2949 return r;
2950 }
2951
2952 /* doorbell offset */
2953 rdev->ring[idx].doorbell_offset =
2954 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2955
2956 /* init the mqd struct */
2957 memset(buf, 0, sizeof(struct bonaire_mqd));
2958
2959 mqd = (struct bonaire_mqd *)buf;
2960 mqd->header = 0xC0310800;
2961 mqd->static_thread_mgmt01[0] = 0xffffffff;
2962 mqd->static_thread_mgmt01[1] = 0xffffffff;
2963 mqd->static_thread_mgmt23[0] = 0xffffffff;
2964 mqd->static_thread_mgmt23[1] = 0xffffffff;
2965
Alex Deucherf61d5b462013-08-06 12:40:16 -04002966 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002967 cik_srbm_select(rdev, rdev->ring[idx].me,
2968 rdev->ring[idx].pipe,
2969 rdev->ring[idx].queue, 0);
2970
2971 /* disable wptr polling */
2972 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2973 tmp &= ~WPTR_POLL_EN;
2974 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2975
2976 /* enable doorbell? */
2977 mqd->queue_state.cp_hqd_pq_doorbell_control =
2978 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2979 if (use_doorbell)
2980 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2981 else
2982 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2983 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2984 mqd->queue_state.cp_hqd_pq_doorbell_control);
2985
2986 /* disable the queue if it's active */
2987 mqd->queue_state.cp_hqd_dequeue_request = 0;
2988 mqd->queue_state.cp_hqd_pq_rptr = 0;
2989 mqd->queue_state.cp_hqd_pq_wptr= 0;
2990 if (RREG32(CP_HQD_ACTIVE) & 1) {
2991 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2992 for (i = 0; i < rdev->usec_timeout; i++) {
2993 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2994 break;
2995 udelay(1);
2996 }
2997 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
2998 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
2999 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3000 }
3001
3002 /* set the pointer to the MQD */
3003 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3004 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3005 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3006 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3007 /* set MQD vmid to 0 */
3008 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3009 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3010 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3011
3012 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3013 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3014 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3015 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3016 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3017 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3018
3019 /* set up the HQD, this is similar to CP_RB0_CNTL */
3020 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3021 mqd->queue_state.cp_hqd_pq_control &=
3022 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3023
3024 mqd->queue_state.cp_hqd_pq_control |=
3025 drm_order(rdev->ring[idx].ring_size / 8);
3026 mqd->queue_state.cp_hqd_pq_control |=
3027 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3028#ifdef __BIG_ENDIAN
3029 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3030#endif
3031 mqd->queue_state.cp_hqd_pq_control &=
3032 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3033 mqd->queue_state.cp_hqd_pq_control |=
3034 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3035 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3036
3037 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3038 if (i == 0)
3039 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3040 else
3041 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3042 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3043 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3044 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3045 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3046 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3047
3048 /* set the wb address wether it's enabled or not */
3049 if (i == 0)
3050 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3051 else
3052 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3053 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3054 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3055 upper_32_bits(wb_gpu_addr) & 0xffff;
3056 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3057 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3058 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3059 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3060
3061 /* enable the doorbell if requested */
3062 if (use_doorbell) {
3063 mqd->queue_state.cp_hqd_pq_doorbell_control =
3064 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3065 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3066 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3067 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3068 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3069 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3070 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3071
3072 } else {
3073 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3074 }
3075 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3076 mqd->queue_state.cp_hqd_pq_doorbell_control);
3077
3078 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3079 rdev->ring[idx].wptr = 0;
3080 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3081 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3082 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3083 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3084
3085 /* set the vmid for the queue */
3086 mqd->queue_state.cp_hqd_vmid = 0;
3087 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3088
3089 /* activate the queue */
3090 mqd->queue_state.cp_hqd_active = 1;
3091 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3092
3093 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003094 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003095
3096 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3097 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3098
3099 rdev->ring[idx].ready = true;
3100 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3101 if (r)
3102 rdev->ring[idx].ready = false;
3103 }
3104
Alex Deucher841cf442012-12-18 21:47:44 -05003105 return 0;
3106}
3107
Alex Deucher841cf442012-12-18 21:47:44 -05003108static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3109{
3110 cik_cp_gfx_enable(rdev, enable);
3111 cik_cp_compute_enable(rdev, enable);
3112}
3113
Alex Deucher841cf442012-12-18 21:47:44 -05003114static int cik_cp_load_microcode(struct radeon_device *rdev)
3115{
3116 int r;
3117
3118 r = cik_cp_gfx_load_microcode(rdev);
3119 if (r)
3120 return r;
3121 r = cik_cp_compute_load_microcode(rdev);
3122 if (r)
3123 return r;
3124
3125 return 0;
3126}
3127
Alex Deucher841cf442012-12-18 21:47:44 -05003128static void cik_cp_fini(struct radeon_device *rdev)
3129{
3130 cik_cp_gfx_fini(rdev);
3131 cik_cp_compute_fini(rdev);
3132}
3133
Alex Deucher841cf442012-12-18 21:47:44 -05003134static int cik_cp_resume(struct radeon_device *rdev)
3135{
3136 int r;
3137
3138 /* Reset all cp blocks */
3139 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3140 RREG32(GRBM_SOFT_RESET);
3141 mdelay(15);
3142 WREG32(GRBM_SOFT_RESET, 0);
3143 RREG32(GRBM_SOFT_RESET);
3144
3145 r = cik_cp_load_microcode(rdev);
3146 if (r)
3147 return r;
3148
3149 r = cik_cp_gfx_resume(rdev);
3150 if (r)
3151 return r;
3152 r = cik_cp_compute_resume(rdev);
3153 if (r)
3154 return r;
3155
3156 return 0;
3157}
3158
Alex Deucher21a93e12013-04-09 12:47:11 -04003159/*
3160 * sDMA - System DMA
3161 * Starting with CIK, the GPU has new asynchronous
3162 * DMA engines. These engines are used for compute
3163 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3164 * and each one supports 1 ring buffer used for gfx
3165 * and 2 queues used for compute.
3166 *
3167 * The programming model is very similar to the CP
3168 * (ring buffer, IBs, etc.), but sDMA has it's own
3169 * packet format that is different from the PM4 format
3170 * used by the CP. sDMA supports copying data, writing
3171 * embedded data, solid fills, and a number of other
3172 * things. It also has support for tiling/detiling of
3173 * buffers.
3174 */
3175/**
3176 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3177 *
3178 * @rdev: radeon_device pointer
3179 * @ib: IB object to schedule
3180 *
3181 * Schedule an IB in the DMA ring (CIK).
3182 */
3183void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3184 struct radeon_ib *ib)
3185{
3186 struct radeon_ring *ring = &rdev->ring[ib->ring];
3187 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3188
3189 if (rdev->wb.enabled) {
3190 u32 next_rptr = ring->wptr + 5;
3191 while ((next_rptr & 7) != 4)
3192 next_rptr++;
3193 next_rptr += 4;
3194 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3195 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3196 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3197 radeon_ring_write(ring, 1); /* number of DWs to follow */
3198 radeon_ring_write(ring, next_rptr);
3199 }
3200
3201 /* IB packet must end on a 8 DW boundary */
3202 while ((ring->wptr & 7) != 4)
3203 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3204 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3205 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3206 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3207 radeon_ring_write(ring, ib->length_dw);
3208
3209}
3210
3211/**
3212 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3213 *
3214 * @rdev: radeon_device pointer
3215 * @fence: radeon fence object
3216 *
3217 * Add a DMA fence packet to the ring to write
3218 * the fence seq number and DMA trap packet to generate
3219 * an interrupt if needed (CIK).
3220 */
3221void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3222 struct radeon_fence *fence)
3223{
3224 struct radeon_ring *ring = &rdev->ring[fence->ring];
3225 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3226 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3227 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3228 u32 ref_and_mask;
3229
3230 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3231 ref_and_mask = SDMA0;
3232 else
3233 ref_and_mask = SDMA1;
3234
3235 /* write the fence */
3236 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3237 radeon_ring_write(ring, addr & 0xffffffff);
3238 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3239 radeon_ring_write(ring, fence->seq);
3240 /* generate an interrupt */
3241 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3242 /* flush HDP */
3243 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3244 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3245 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3246 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3247 radeon_ring_write(ring, ref_and_mask); /* MASK */
3248 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3249}
3250
3251/**
3252 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3253 *
3254 * @rdev: radeon_device pointer
3255 * @ring: radeon_ring structure holding ring information
3256 * @semaphore: radeon semaphore object
3257 * @emit_wait: wait or signal semaphore
3258 *
3259 * Add a DMA semaphore packet to the ring wait on or signal
3260 * other rings (CIK).
3261 */
3262void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3263 struct radeon_ring *ring,
3264 struct radeon_semaphore *semaphore,
3265 bool emit_wait)
3266{
3267 u64 addr = semaphore->gpu_addr;
3268 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3269
3270 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3271 radeon_ring_write(ring, addr & 0xfffffff8);
3272 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3273}
3274
3275/**
3276 * cik_sdma_gfx_stop - stop the gfx async dma engines
3277 *
3278 * @rdev: radeon_device pointer
3279 *
3280 * Stop the gfx async dma ring buffers (CIK).
3281 */
3282static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3283{
3284 u32 rb_cntl, reg_offset;
3285 int i;
3286
3287 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3288
3289 for (i = 0; i < 2; i++) {
3290 if (i == 0)
3291 reg_offset = SDMA0_REGISTER_OFFSET;
3292 else
3293 reg_offset = SDMA1_REGISTER_OFFSET;
3294 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3295 rb_cntl &= ~SDMA_RB_ENABLE;
3296 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3297 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3298 }
3299}
3300
3301/**
3302 * cik_sdma_rlc_stop - stop the compute async dma engines
3303 *
3304 * @rdev: radeon_device pointer
3305 *
3306 * Stop the compute async dma queues (CIK).
3307 */
3308static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3309{
3310 /* XXX todo */
3311}
3312
3313/**
3314 * cik_sdma_enable - stop the async dma engines
3315 *
3316 * @rdev: radeon_device pointer
3317 * @enable: enable/disable the DMA MEs.
3318 *
3319 * Halt or unhalt the async dma engines (CIK).
3320 */
3321static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3322{
3323 u32 me_cntl, reg_offset;
3324 int i;
3325
3326 for (i = 0; i < 2; i++) {
3327 if (i == 0)
3328 reg_offset = SDMA0_REGISTER_OFFSET;
3329 else
3330 reg_offset = SDMA1_REGISTER_OFFSET;
3331 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3332 if (enable)
3333 me_cntl &= ~SDMA_HALT;
3334 else
3335 me_cntl |= SDMA_HALT;
3336 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3337 }
3338}
3339
3340/**
3341 * cik_sdma_gfx_resume - setup and start the async dma engines
3342 *
3343 * @rdev: radeon_device pointer
3344 *
3345 * Set up the gfx DMA ring buffers and enable them (CIK).
3346 * Returns 0 for success, error for failure.
3347 */
3348static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3349{
3350 struct radeon_ring *ring;
3351 u32 rb_cntl, ib_cntl;
3352 u32 rb_bufsz;
3353 u32 reg_offset, wb_offset;
3354 int i, r;
3355
3356 for (i = 0; i < 2; i++) {
3357 if (i == 0) {
3358 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3359 reg_offset = SDMA0_REGISTER_OFFSET;
3360 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3361 } else {
3362 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3363 reg_offset = SDMA1_REGISTER_OFFSET;
3364 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3365 }
3366
3367 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3368 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3369
3370 /* Set ring buffer size in dwords */
3371 rb_bufsz = drm_order(ring->ring_size / 4);
3372 rb_cntl = rb_bufsz << 1;
3373#ifdef __BIG_ENDIAN
3374 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3375#endif
3376 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3377
3378 /* Initialize the ring buffer's read and write pointers */
3379 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3380 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3381
3382 /* set the wb address whether it's enabled or not */
3383 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3384 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3385 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3386 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3387
3388 if (rdev->wb.enabled)
3389 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3390
3391 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3392 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3393
3394 ring->wptr = 0;
3395 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3396
3397 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3398
3399 /* enable DMA RB */
3400 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3401
3402 ib_cntl = SDMA_IB_ENABLE;
3403#ifdef __BIG_ENDIAN
3404 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3405#endif
3406 /* enable DMA IBs */
3407 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3408
3409 ring->ready = true;
3410
3411 r = radeon_ring_test(rdev, ring->idx, ring);
3412 if (r) {
3413 ring->ready = false;
3414 return r;
3415 }
3416 }
3417
3418 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3419
3420 return 0;
3421}
3422
3423/**
3424 * cik_sdma_rlc_resume - setup and start the async dma engines
3425 *
3426 * @rdev: radeon_device pointer
3427 *
3428 * Set up the compute DMA queues and enable them (CIK).
3429 * Returns 0 for success, error for failure.
3430 */
3431static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3432{
3433 /* XXX todo */
3434 return 0;
3435}
3436
3437/**
3438 * cik_sdma_load_microcode - load the sDMA ME ucode
3439 *
3440 * @rdev: radeon_device pointer
3441 *
3442 * Loads the sDMA0/1 ucode.
3443 * Returns 0 for success, -EINVAL if the ucode is not available.
3444 */
3445static int cik_sdma_load_microcode(struct radeon_device *rdev)
3446{
3447 const __be32 *fw_data;
3448 int i;
3449
3450 if (!rdev->sdma_fw)
3451 return -EINVAL;
3452
3453 /* stop the gfx rings and rlc compute queues */
3454 cik_sdma_gfx_stop(rdev);
3455 cik_sdma_rlc_stop(rdev);
3456
3457 /* halt the MEs */
3458 cik_sdma_enable(rdev, false);
3459
3460 /* sdma0 */
3461 fw_data = (const __be32 *)rdev->sdma_fw->data;
3462 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3463 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3464 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3465 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3466
3467 /* sdma1 */
3468 fw_data = (const __be32 *)rdev->sdma_fw->data;
3469 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3470 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3471 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3472 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3473
3474 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3475 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3476 return 0;
3477}
3478
3479/**
3480 * cik_sdma_resume - setup and start the async dma engines
3481 *
3482 * @rdev: radeon_device pointer
3483 *
3484 * Set up the DMA engines and enable them (CIK).
3485 * Returns 0 for success, error for failure.
3486 */
3487static int cik_sdma_resume(struct radeon_device *rdev)
3488{
3489 int r;
3490
3491 /* Reset dma */
3492 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3493 RREG32(SRBM_SOFT_RESET);
3494 udelay(50);
3495 WREG32(SRBM_SOFT_RESET, 0);
3496 RREG32(SRBM_SOFT_RESET);
3497
3498 r = cik_sdma_load_microcode(rdev);
3499 if (r)
3500 return r;
3501
3502 /* unhalt the MEs */
3503 cik_sdma_enable(rdev, true);
3504
3505 /* start the gfx rings and rlc compute queues */
3506 r = cik_sdma_gfx_resume(rdev);
3507 if (r)
3508 return r;
3509 r = cik_sdma_rlc_resume(rdev);
3510 if (r)
3511 return r;
3512
3513 return 0;
3514}
3515
3516/**
3517 * cik_sdma_fini - tear down the async dma engines
3518 *
3519 * @rdev: radeon_device pointer
3520 *
3521 * Stop the async dma engines and free the rings (CIK).
3522 */
3523static void cik_sdma_fini(struct radeon_device *rdev)
3524{
3525 /* stop the gfx rings and rlc compute queues */
3526 cik_sdma_gfx_stop(rdev);
3527 cik_sdma_rlc_stop(rdev);
3528 /* halt the MEs */
3529 cik_sdma_enable(rdev, false);
3530 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3531 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3532 /* XXX - compute dma queue tear down */
3533}
3534
3535/**
3536 * cik_copy_dma - copy pages using the DMA engine
3537 *
3538 * @rdev: radeon_device pointer
3539 * @src_offset: src GPU address
3540 * @dst_offset: dst GPU address
3541 * @num_gpu_pages: number of GPU pages to xfer
3542 * @fence: radeon fence object
3543 *
3544 * Copy GPU paging using the DMA engine (CIK).
3545 * Used by the radeon ttm implementation to move pages if
3546 * registered as the asic copy callback.
3547 */
3548int cik_copy_dma(struct radeon_device *rdev,
3549 uint64_t src_offset, uint64_t dst_offset,
3550 unsigned num_gpu_pages,
3551 struct radeon_fence **fence)
3552{
3553 struct radeon_semaphore *sem = NULL;
3554 int ring_index = rdev->asic->copy.dma_ring_index;
3555 struct radeon_ring *ring = &rdev->ring[ring_index];
3556 u32 size_in_bytes, cur_size_in_bytes;
3557 int i, num_loops;
3558 int r = 0;
3559
3560 r = radeon_semaphore_create(rdev, &sem);
3561 if (r) {
3562 DRM_ERROR("radeon: moving bo (%d).\n", r);
3563 return r;
3564 }
3565
3566 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3567 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3568 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3569 if (r) {
3570 DRM_ERROR("radeon: moving bo (%d).\n", r);
3571 radeon_semaphore_free(rdev, &sem, NULL);
3572 return r;
3573 }
3574
3575 if (radeon_fence_need_sync(*fence, ring->idx)) {
3576 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3577 ring->idx);
3578 radeon_fence_note_sync(*fence, ring->idx);
3579 } else {
3580 radeon_semaphore_free(rdev, &sem, NULL);
3581 }
3582
3583 for (i = 0; i < num_loops; i++) {
3584 cur_size_in_bytes = size_in_bytes;
3585 if (cur_size_in_bytes > 0x1fffff)
3586 cur_size_in_bytes = 0x1fffff;
3587 size_in_bytes -= cur_size_in_bytes;
3588 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3589 radeon_ring_write(ring, cur_size_in_bytes);
3590 radeon_ring_write(ring, 0); /* src/dst endian swap */
3591 radeon_ring_write(ring, src_offset & 0xffffffff);
3592 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3593 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3594 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3595 src_offset += cur_size_in_bytes;
3596 dst_offset += cur_size_in_bytes;
3597 }
3598
3599 r = radeon_fence_emit(rdev, fence, ring->idx);
3600 if (r) {
3601 radeon_ring_unlock_undo(rdev, ring);
3602 return r;
3603 }
3604
3605 radeon_ring_unlock_commit(rdev, ring);
3606 radeon_semaphore_free(rdev, &sem, *fence);
3607
3608 return r;
3609}
3610
3611/**
3612 * cik_sdma_ring_test - simple async dma engine test
3613 *
3614 * @rdev: radeon_device pointer
3615 * @ring: radeon_ring structure holding ring information
3616 *
3617 * Test the DMA engine by writing using it to write an
3618 * value to memory. (CIK).
3619 * Returns 0 for success, error for failure.
3620 */
3621int cik_sdma_ring_test(struct radeon_device *rdev,
3622 struct radeon_ring *ring)
3623{
3624 unsigned i;
3625 int r;
3626 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3627 u32 tmp;
3628
3629 if (!ptr) {
3630 DRM_ERROR("invalid vram scratch pointer\n");
3631 return -EINVAL;
3632 }
3633
3634 tmp = 0xCAFEDEAD;
3635 writel(tmp, ptr);
3636
3637 r = radeon_ring_lock(rdev, ring, 4);
3638 if (r) {
3639 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3640 return r;
3641 }
3642 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3643 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3644 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3645 radeon_ring_write(ring, 1); /* number of DWs to follow */
3646 radeon_ring_write(ring, 0xDEADBEEF);
3647 radeon_ring_unlock_commit(rdev, ring);
3648
3649 for (i = 0; i < rdev->usec_timeout; i++) {
3650 tmp = readl(ptr);
3651 if (tmp == 0xDEADBEEF)
3652 break;
3653 DRM_UDELAY(1);
3654 }
3655
3656 if (i < rdev->usec_timeout) {
3657 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3658 } else {
3659 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3660 ring->idx, tmp);
3661 r = -EINVAL;
3662 }
3663 return r;
3664}
3665
3666/**
3667 * cik_sdma_ib_test - test an IB on the DMA engine
3668 *
3669 * @rdev: radeon_device pointer
3670 * @ring: radeon_ring structure holding ring information
3671 *
3672 * Test a simple IB in the DMA ring (CIK).
3673 * Returns 0 on success, error on failure.
3674 */
3675int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3676{
3677 struct radeon_ib ib;
3678 unsigned i;
3679 int r;
3680 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3681 u32 tmp = 0;
3682
3683 if (!ptr) {
3684 DRM_ERROR("invalid vram scratch pointer\n");
3685 return -EINVAL;
3686 }
3687
3688 tmp = 0xCAFEDEAD;
3689 writel(tmp, ptr);
3690
3691 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3692 if (r) {
3693 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3694 return r;
3695 }
3696
3697 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3698 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3699 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3700 ib.ptr[3] = 1;
3701 ib.ptr[4] = 0xDEADBEEF;
3702 ib.length_dw = 5;
3703
3704 r = radeon_ib_schedule(rdev, &ib, NULL);
3705 if (r) {
3706 radeon_ib_free(rdev, &ib);
3707 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3708 return r;
3709 }
3710 r = radeon_fence_wait(ib.fence, false);
3711 if (r) {
3712 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3713 return r;
3714 }
3715 for (i = 0; i < rdev->usec_timeout; i++) {
3716 tmp = readl(ptr);
3717 if (tmp == 0xDEADBEEF)
3718 break;
3719 DRM_UDELAY(1);
3720 }
3721 if (i < rdev->usec_timeout) {
3722 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3723 } else {
3724 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3725 r = -EINVAL;
3726 }
3727 radeon_ib_free(rdev, &ib);
3728 return r;
3729}
3730
Alex Deuchercc066712013-04-09 12:59:51 -04003731
3732static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3733{
3734 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3735 RREG32(GRBM_STATUS));
3736 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3737 RREG32(GRBM_STATUS2));
3738 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3739 RREG32(GRBM_STATUS_SE0));
3740 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3741 RREG32(GRBM_STATUS_SE1));
3742 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3743 RREG32(GRBM_STATUS_SE2));
3744 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3745 RREG32(GRBM_STATUS_SE3));
3746 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3747 RREG32(SRBM_STATUS));
3748 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3749 RREG32(SRBM_STATUS2));
3750 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3751 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3752 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3753 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003754 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3755 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3756 RREG32(CP_STALLED_STAT1));
3757 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3758 RREG32(CP_STALLED_STAT2));
3759 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3760 RREG32(CP_STALLED_STAT3));
3761 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3762 RREG32(CP_CPF_BUSY_STAT));
3763 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3764 RREG32(CP_CPF_STALLED_STAT1));
3765 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3766 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3767 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3768 RREG32(CP_CPC_STALLED_STAT1));
3769 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003770}
3771
Alex Deucher6f2043c2013-04-09 12:43:41 -04003772/**
Alex Deuchercc066712013-04-09 12:59:51 -04003773 * cik_gpu_check_soft_reset - check which blocks are busy
3774 *
3775 * @rdev: radeon_device pointer
3776 *
3777 * Check which blocks are busy and return the relevant reset
3778 * mask to be used by cik_gpu_soft_reset().
3779 * Returns a mask of the blocks to be reset.
3780 */
3781static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3782{
3783 u32 reset_mask = 0;
3784 u32 tmp;
3785
3786 /* GRBM_STATUS */
3787 tmp = RREG32(GRBM_STATUS);
3788 if (tmp & (PA_BUSY | SC_BUSY |
3789 BCI_BUSY | SX_BUSY |
3790 TA_BUSY | VGT_BUSY |
3791 DB_BUSY | CB_BUSY |
3792 GDS_BUSY | SPI_BUSY |
3793 IA_BUSY | IA_BUSY_NO_DMA))
3794 reset_mask |= RADEON_RESET_GFX;
3795
3796 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3797 reset_mask |= RADEON_RESET_CP;
3798
3799 /* GRBM_STATUS2 */
3800 tmp = RREG32(GRBM_STATUS2);
3801 if (tmp & RLC_BUSY)
3802 reset_mask |= RADEON_RESET_RLC;
3803
3804 /* SDMA0_STATUS_REG */
3805 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3806 if (!(tmp & SDMA_IDLE))
3807 reset_mask |= RADEON_RESET_DMA;
3808
3809 /* SDMA1_STATUS_REG */
3810 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3811 if (!(tmp & SDMA_IDLE))
3812 reset_mask |= RADEON_RESET_DMA1;
3813
3814 /* SRBM_STATUS2 */
3815 tmp = RREG32(SRBM_STATUS2);
3816 if (tmp & SDMA_BUSY)
3817 reset_mask |= RADEON_RESET_DMA;
3818
3819 if (tmp & SDMA1_BUSY)
3820 reset_mask |= RADEON_RESET_DMA1;
3821
3822 /* SRBM_STATUS */
3823 tmp = RREG32(SRBM_STATUS);
3824
3825 if (tmp & IH_BUSY)
3826 reset_mask |= RADEON_RESET_IH;
3827
3828 if (tmp & SEM_BUSY)
3829 reset_mask |= RADEON_RESET_SEM;
3830
3831 if (tmp & GRBM_RQ_PENDING)
3832 reset_mask |= RADEON_RESET_GRBM;
3833
3834 if (tmp & VMC_BUSY)
3835 reset_mask |= RADEON_RESET_VMC;
3836
3837 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3838 MCC_BUSY | MCD_BUSY))
3839 reset_mask |= RADEON_RESET_MC;
3840
3841 if (evergreen_is_display_hung(rdev))
3842 reset_mask |= RADEON_RESET_DISPLAY;
3843
3844 /* Skip MC reset as it's mostly likely not hung, just busy */
3845 if (reset_mask & RADEON_RESET_MC) {
3846 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3847 reset_mask &= ~RADEON_RESET_MC;
3848 }
3849
3850 return reset_mask;
3851}
3852
3853/**
3854 * cik_gpu_soft_reset - soft reset GPU
3855 *
3856 * @rdev: radeon_device pointer
3857 * @reset_mask: mask of which blocks to reset
3858 *
3859 * Soft reset the blocks specified in @reset_mask.
3860 */
3861static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3862{
3863 struct evergreen_mc_save save;
3864 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3865 u32 tmp;
3866
3867 if (reset_mask == 0)
3868 return;
3869
3870 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3871
3872 cik_print_gpu_status_regs(rdev);
3873 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3874 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3875 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3876 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3877
3878 /* stop the rlc */
3879 cik_rlc_stop(rdev);
3880
3881 /* Disable GFX parsing/prefetching */
3882 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3883
3884 /* Disable MEC parsing/prefetching */
3885 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3886
3887 if (reset_mask & RADEON_RESET_DMA) {
3888 /* sdma0 */
3889 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3890 tmp |= SDMA_HALT;
3891 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3892 }
3893 if (reset_mask & RADEON_RESET_DMA1) {
3894 /* sdma1 */
3895 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3896 tmp |= SDMA_HALT;
3897 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3898 }
3899
3900 evergreen_mc_stop(rdev, &save);
3901 if (evergreen_mc_wait_for_idle(rdev)) {
3902 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3903 }
3904
3905 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3906 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3907
3908 if (reset_mask & RADEON_RESET_CP) {
3909 grbm_soft_reset |= SOFT_RESET_CP;
3910
3911 srbm_soft_reset |= SOFT_RESET_GRBM;
3912 }
3913
3914 if (reset_mask & RADEON_RESET_DMA)
3915 srbm_soft_reset |= SOFT_RESET_SDMA;
3916
3917 if (reset_mask & RADEON_RESET_DMA1)
3918 srbm_soft_reset |= SOFT_RESET_SDMA1;
3919
3920 if (reset_mask & RADEON_RESET_DISPLAY)
3921 srbm_soft_reset |= SOFT_RESET_DC;
3922
3923 if (reset_mask & RADEON_RESET_RLC)
3924 grbm_soft_reset |= SOFT_RESET_RLC;
3925
3926 if (reset_mask & RADEON_RESET_SEM)
3927 srbm_soft_reset |= SOFT_RESET_SEM;
3928
3929 if (reset_mask & RADEON_RESET_IH)
3930 srbm_soft_reset |= SOFT_RESET_IH;
3931
3932 if (reset_mask & RADEON_RESET_GRBM)
3933 srbm_soft_reset |= SOFT_RESET_GRBM;
3934
3935 if (reset_mask & RADEON_RESET_VMC)
3936 srbm_soft_reset |= SOFT_RESET_VMC;
3937
3938 if (!(rdev->flags & RADEON_IS_IGP)) {
3939 if (reset_mask & RADEON_RESET_MC)
3940 srbm_soft_reset |= SOFT_RESET_MC;
3941 }
3942
3943 if (grbm_soft_reset) {
3944 tmp = RREG32(GRBM_SOFT_RESET);
3945 tmp |= grbm_soft_reset;
3946 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3947 WREG32(GRBM_SOFT_RESET, tmp);
3948 tmp = RREG32(GRBM_SOFT_RESET);
3949
3950 udelay(50);
3951
3952 tmp &= ~grbm_soft_reset;
3953 WREG32(GRBM_SOFT_RESET, tmp);
3954 tmp = RREG32(GRBM_SOFT_RESET);
3955 }
3956
3957 if (srbm_soft_reset) {
3958 tmp = RREG32(SRBM_SOFT_RESET);
3959 tmp |= srbm_soft_reset;
3960 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3961 WREG32(SRBM_SOFT_RESET, tmp);
3962 tmp = RREG32(SRBM_SOFT_RESET);
3963
3964 udelay(50);
3965
3966 tmp &= ~srbm_soft_reset;
3967 WREG32(SRBM_SOFT_RESET, tmp);
3968 tmp = RREG32(SRBM_SOFT_RESET);
3969 }
3970
3971 /* Wait a little for things to settle down */
3972 udelay(50);
3973
3974 evergreen_mc_resume(rdev, &save);
3975 udelay(50);
3976
3977 cik_print_gpu_status_regs(rdev);
3978}
3979
3980/**
3981 * cik_asic_reset - soft reset GPU
3982 *
3983 * @rdev: radeon_device pointer
3984 *
3985 * Look up which blocks are hung and attempt
3986 * to reset them.
3987 * Returns 0 for success.
3988 */
3989int cik_asic_reset(struct radeon_device *rdev)
3990{
3991 u32 reset_mask;
3992
3993 reset_mask = cik_gpu_check_soft_reset(rdev);
3994
3995 if (reset_mask)
3996 r600_set_bios_scratch_engine_hung(rdev, true);
3997
3998 cik_gpu_soft_reset(rdev, reset_mask);
3999
4000 reset_mask = cik_gpu_check_soft_reset(rdev);
4001
4002 if (!reset_mask)
4003 r600_set_bios_scratch_engine_hung(rdev, false);
4004
4005 return 0;
4006}
4007
4008/**
4009 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004010 *
4011 * @rdev: radeon_device pointer
4012 * @ring: radeon_ring structure holding ring information
4013 *
4014 * Check if the 3D engine is locked up (CIK).
4015 * Returns true if the engine is locked, false if not.
4016 */
Alex Deuchercc066712013-04-09 12:59:51 -04004017bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004018{
Alex Deuchercc066712013-04-09 12:59:51 -04004019 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004020
Alex Deuchercc066712013-04-09 12:59:51 -04004021 if (!(reset_mask & (RADEON_RESET_GFX |
4022 RADEON_RESET_COMPUTE |
4023 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004024 radeon_ring_lockup_update(ring);
4025 return false;
4026 }
4027 /* force CP activities */
4028 radeon_ring_force_activity(rdev, ring);
4029 return radeon_ring_test_lockup(rdev, ring);
4030}
4031
4032/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004033 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4034 *
4035 * @rdev: radeon_device pointer
4036 * @ring: radeon_ring structure holding ring information
4037 *
4038 * Check if the async DMA engine is locked up (CIK).
4039 * Returns true if the engine appears to be locked up, false if not.
4040 */
4041bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4042{
Alex Deuchercc066712013-04-09 12:59:51 -04004043 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4044 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004045
4046 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004047 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004048 else
Alex Deuchercc066712013-04-09 12:59:51 -04004049 mask = RADEON_RESET_DMA1;
4050
4051 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004052 radeon_ring_lockup_update(ring);
4053 return false;
4054 }
4055 /* force ring activities */
4056 radeon_ring_force_activity(rdev, ring);
4057 return radeon_ring_test_lockup(rdev, ring);
4058}
4059
Alex Deucher1c491652013-04-09 12:45:26 -04004060/* MC */
4061/**
4062 * cik_mc_program - program the GPU memory controller
4063 *
4064 * @rdev: radeon_device pointer
4065 *
4066 * Set the location of vram, gart, and AGP in the GPU's
4067 * physical address space (CIK).
4068 */
4069static void cik_mc_program(struct radeon_device *rdev)
4070{
4071 struct evergreen_mc_save save;
4072 u32 tmp;
4073 int i, j;
4074
4075 /* Initialize HDP */
4076 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4077 WREG32((0x2c14 + j), 0x00000000);
4078 WREG32((0x2c18 + j), 0x00000000);
4079 WREG32((0x2c1c + j), 0x00000000);
4080 WREG32((0x2c20 + j), 0x00000000);
4081 WREG32((0x2c24 + j), 0x00000000);
4082 }
4083 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4084
4085 evergreen_mc_stop(rdev, &save);
4086 if (radeon_mc_wait_for_idle(rdev)) {
4087 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4088 }
4089 /* Lockout access through VGA aperture*/
4090 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4091 /* Update configuration */
4092 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4093 rdev->mc.vram_start >> 12);
4094 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4095 rdev->mc.vram_end >> 12);
4096 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4097 rdev->vram_scratch.gpu_addr >> 12);
4098 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4099 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4100 WREG32(MC_VM_FB_LOCATION, tmp);
4101 /* XXX double check these! */
4102 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4103 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4104 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4105 WREG32(MC_VM_AGP_BASE, 0);
4106 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4107 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4108 if (radeon_mc_wait_for_idle(rdev)) {
4109 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4110 }
4111 evergreen_mc_resume(rdev, &save);
4112 /* we need to own VRAM, so turn off the VGA renderer here
4113 * to stop it overwriting our objects */
4114 rv515_vga_render_disable(rdev);
4115}
4116
4117/**
4118 * cik_mc_init - initialize the memory controller driver params
4119 *
4120 * @rdev: radeon_device pointer
4121 *
4122 * Look up the amount of vram, vram width, and decide how to place
4123 * vram and gart within the GPU's physical address space (CIK).
4124 * Returns 0 for success.
4125 */
4126static int cik_mc_init(struct radeon_device *rdev)
4127{
4128 u32 tmp;
4129 int chansize, numchan;
4130
4131 /* Get VRAM informations */
4132 rdev->mc.vram_is_ddr = true;
4133 tmp = RREG32(MC_ARB_RAMCFG);
4134 if (tmp & CHANSIZE_MASK) {
4135 chansize = 64;
4136 } else {
4137 chansize = 32;
4138 }
4139 tmp = RREG32(MC_SHARED_CHMAP);
4140 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4141 case 0:
4142 default:
4143 numchan = 1;
4144 break;
4145 case 1:
4146 numchan = 2;
4147 break;
4148 case 2:
4149 numchan = 4;
4150 break;
4151 case 3:
4152 numchan = 8;
4153 break;
4154 case 4:
4155 numchan = 3;
4156 break;
4157 case 5:
4158 numchan = 6;
4159 break;
4160 case 6:
4161 numchan = 10;
4162 break;
4163 case 7:
4164 numchan = 12;
4165 break;
4166 case 8:
4167 numchan = 16;
4168 break;
4169 }
4170 rdev->mc.vram_width = numchan * chansize;
4171 /* Could aper size report 0 ? */
4172 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4173 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4174 /* size in MB on si */
4175 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4176 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4177 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4178 si_vram_gtt_location(rdev, &rdev->mc);
4179 radeon_update_bandwidth_info(rdev);
4180
4181 return 0;
4182}
4183
4184/*
4185 * GART
4186 * VMID 0 is the physical GPU addresses as used by the kernel.
4187 * VMIDs 1-15 are used for userspace clients and are handled
4188 * by the radeon vm/hsa code.
4189 */
4190/**
4191 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4192 *
4193 * @rdev: radeon_device pointer
4194 *
4195 * Flush the TLB for the VMID 0 page table (CIK).
4196 */
4197void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4198{
4199 /* flush hdp cache */
4200 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4201
4202 /* bits 0-15 are the VM contexts0-15 */
4203 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4204}
4205
4206/**
4207 * cik_pcie_gart_enable - gart enable
4208 *
4209 * @rdev: radeon_device pointer
4210 *
4211 * This sets up the TLBs, programs the page tables for VMID0,
4212 * sets up the hw for VMIDs 1-15 which are allocated on
4213 * demand, and sets up the global locations for the LDS, GDS,
4214 * and GPUVM for FSA64 clients (CIK).
4215 * Returns 0 for success, errors for failure.
4216 */
4217static int cik_pcie_gart_enable(struct radeon_device *rdev)
4218{
4219 int r, i;
4220
4221 if (rdev->gart.robj == NULL) {
4222 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4223 return -EINVAL;
4224 }
4225 r = radeon_gart_table_vram_pin(rdev);
4226 if (r)
4227 return r;
4228 radeon_gart_restore(rdev);
4229 /* Setup TLB control */
4230 WREG32(MC_VM_MX_L1_TLB_CNTL,
4231 (0xA << 7) |
4232 ENABLE_L1_TLB |
4233 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4234 ENABLE_ADVANCED_DRIVER_MODEL |
4235 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4236 /* Setup L2 cache */
4237 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4238 ENABLE_L2_FRAGMENT_PROCESSING |
4239 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4240 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4241 EFFECTIVE_L2_QUEUE_SIZE(7) |
4242 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4243 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4244 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4245 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4246 /* setup context0 */
4247 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4248 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4249 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4250 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4251 (u32)(rdev->dummy_page.addr >> 12));
4252 WREG32(VM_CONTEXT0_CNTL2, 0);
4253 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4254 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4255
4256 WREG32(0x15D4, 0);
4257 WREG32(0x15D8, 0);
4258 WREG32(0x15DC, 0);
4259
4260 /* empty context1-15 */
4261 /* FIXME start with 4G, once using 2 level pt switch to full
4262 * vm size space
4263 */
4264 /* set vm size, must be a multiple of 4 */
4265 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4266 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4267 for (i = 1; i < 16; i++) {
4268 if (i < 8)
4269 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4270 rdev->gart.table_addr >> 12);
4271 else
4272 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4273 rdev->gart.table_addr >> 12);
4274 }
4275
4276 /* enable context1-15 */
4277 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4278 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004279 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004280 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004281 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4282 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4283 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4284 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4285 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4286 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4287 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4288 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4289 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4290 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4291 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4292 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004293
4294 /* TC cache setup ??? */
4295 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4296 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4297 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4298
4299 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4300 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4301 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4302 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4303 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4304
4305 WREG32(TC_CFG_L1_VOLATILE, 0);
4306 WREG32(TC_CFG_L2_VOLATILE, 0);
4307
4308 if (rdev->family == CHIP_KAVERI) {
4309 u32 tmp = RREG32(CHUB_CONTROL);
4310 tmp &= ~BYPASS_VM;
4311 WREG32(CHUB_CONTROL, tmp);
4312 }
4313
4314 /* XXX SH_MEM regs */
4315 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04004316 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004317 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004318 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004319 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004320 WREG32(SH_MEM_CONFIG, 0);
4321 WREG32(SH_MEM_APE1_BASE, 1);
4322 WREG32(SH_MEM_APE1_LIMIT, 0);
4323 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004324 /* SDMA GFX */
4325 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4326 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4327 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4328 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4329 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004330 }
Alex Deucherb556b122013-01-29 10:44:22 -05004331 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04004332 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004333
4334 cik_pcie_gart_tlb_flush(rdev);
4335 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4336 (unsigned)(rdev->mc.gtt_size >> 20),
4337 (unsigned long long)rdev->gart.table_addr);
4338 rdev->gart.ready = true;
4339 return 0;
4340}
4341
4342/**
4343 * cik_pcie_gart_disable - gart disable
4344 *
4345 * @rdev: radeon_device pointer
4346 *
4347 * This disables all VM page table (CIK).
4348 */
4349static void cik_pcie_gart_disable(struct radeon_device *rdev)
4350{
4351 /* Disable all tables */
4352 WREG32(VM_CONTEXT0_CNTL, 0);
4353 WREG32(VM_CONTEXT1_CNTL, 0);
4354 /* Setup TLB control */
4355 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4356 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4357 /* Setup L2 cache */
4358 WREG32(VM_L2_CNTL,
4359 ENABLE_L2_FRAGMENT_PROCESSING |
4360 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4361 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4362 EFFECTIVE_L2_QUEUE_SIZE(7) |
4363 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4364 WREG32(VM_L2_CNTL2, 0);
4365 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4366 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4367 radeon_gart_table_vram_unpin(rdev);
4368}
4369
4370/**
4371 * cik_pcie_gart_fini - vm fini callback
4372 *
4373 * @rdev: radeon_device pointer
4374 *
4375 * Tears down the driver GART/VM setup (CIK).
4376 */
4377static void cik_pcie_gart_fini(struct radeon_device *rdev)
4378{
4379 cik_pcie_gart_disable(rdev);
4380 radeon_gart_table_vram_free(rdev);
4381 radeon_gart_fini(rdev);
4382}
4383
4384/* vm parser */
4385/**
4386 * cik_ib_parse - vm ib_parse callback
4387 *
4388 * @rdev: radeon_device pointer
4389 * @ib: indirect buffer pointer
4390 *
4391 * CIK uses hw IB checking so this is a nop (CIK).
4392 */
4393int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4394{
4395 return 0;
4396}
4397
4398/*
4399 * vm
4400 * VMID 0 is the physical GPU addresses as used by the kernel.
4401 * VMIDs 1-15 are used for userspace clients and are handled
4402 * by the radeon vm/hsa code.
4403 */
4404/**
4405 * cik_vm_init - cik vm init callback
4406 *
4407 * @rdev: radeon_device pointer
4408 *
4409 * Inits cik specific vm parameters (number of VMs, base of vram for
4410 * VMIDs 1-15) (CIK).
4411 * Returns 0 for success.
4412 */
4413int cik_vm_init(struct radeon_device *rdev)
4414{
4415 /* number of VMs */
4416 rdev->vm_manager.nvm = 16;
4417 /* base offset of vram pages */
4418 if (rdev->flags & RADEON_IS_IGP) {
4419 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4420 tmp <<= 22;
4421 rdev->vm_manager.vram_base_offset = tmp;
4422 } else
4423 rdev->vm_manager.vram_base_offset = 0;
4424
4425 return 0;
4426}
4427
4428/**
4429 * cik_vm_fini - cik vm fini callback
4430 *
4431 * @rdev: radeon_device pointer
4432 *
4433 * Tear down any asic specific VM setup (CIK).
4434 */
4435void cik_vm_fini(struct radeon_device *rdev)
4436{
4437}
4438
Alex Deucherf96ab482012-08-31 10:37:47 -04004439/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04004440 * cik_vm_decode_fault - print human readable fault info
4441 *
4442 * @rdev: radeon_device pointer
4443 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4444 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4445 *
4446 * Print human readable fault information (CIK).
4447 */
4448static void cik_vm_decode_fault(struct radeon_device *rdev,
4449 u32 status, u32 addr, u32 mc_client)
4450{
4451 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4452 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4453 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4454 char *block = (char *)&mc_client;
4455
4456 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4457 protections, vmid, addr,
4458 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4459 block, mc_id);
4460}
4461
4462/**
Alex Deucherf96ab482012-08-31 10:37:47 -04004463 * cik_vm_flush - cik vm flush using the CP
4464 *
4465 * @rdev: radeon_device pointer
4466 *
4467 * Update the page table base and flush the VM TLB
4468 * using the CP (CIK).
4469 */
4470void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4471{
4472 struct radeon_ring *ring = &rdev->ring[ridx];
4473
4474 if (vm == NULL)
4475 return;
4476
4477 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4478 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4479 WRITE_DATA_DST_SEL(0)));
4480 if (vm->id < 8) {
4481 radeon_ring_write(ring,
4482 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4483 } else {
4484 radeon_ring_write(ring,
4485 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4486 }
4487 radeon_ring_write(ring, 0);
4488 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4489
4490 /* update SH_MEM_* regs */
4491 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4492 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493 WRITE_DATA_DST_SEL(0)));
4494 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4495 radeon_ring_write(ring, 0);
4496 radeon_ring_write(ring, VMID(vm->id));
4497
4498 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4499 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4500 WRITE_DATA_DST_SEL(0)));
4501 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4502 radeon_ring_write(ring, 0);
4503
4504 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4505 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4506 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4507 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4508
4509 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4510 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4511 WRITE_DATA_DST_SEL(0)));
4512 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4513 radeon_ring_write(ring, 0);
4514 radeon_ring_write(ring, VMID(0));
4515
4516 /* HDP flush */
4517 /* We should be using the WAIT_REG_MEM packet here like in
4518 * cik_fence_ring_emit(), but it causes the CP to hang in this
4519 * context...
4520 */
4521 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4522 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4523 WRITE_DATA_DST_SEL(0)));
4524 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4525 radeon_ring_write(ring, 0);
4526 radeon_ring_write(ring, 0);
4527
4528 /* bits 0-15 are the VM contexts0-15 */
4529 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4530 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4531 WRITE_DATA_DST_SEL(0)));
4532 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4533 radeon_ring_write(ring, 0);
4534 radeon_ring_write(ring, 1 << vm->id);
4535
Alex Deucherb07fdd32013-04-11 09:36:17 -04004536 /* compute doesn't have PFP */
4537 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4538 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4539 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4540 radeon_ring_write(ring, 0x0);
4541 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004542}
4543
Alex Deucher605de6b2012-10-22 13:04:03 -04004544/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004545 * cik_vm_set_page - update the page tables using sDMA
4546 *
4547 * @rdev: radeon_device pointer
4548 * @ib: indirect buffer to fill with commands
4549 * @pe: addr of the page entry
4550 * @addr: dst addr to write into pe
4551 * @count: number of page entries to update
4552 * @incr: increase next addr by incr bytes
4553 * @flags: access flags
4554 *
4555 * Update the page tables using CP or sDMA (CIK).
4556 */
4557void cik_vm_set_page(struct radeon_device *rdev,
4558 struct radeon_ib *ib,
4559 uint64_t pe,
4560 uint64_t addr, unsigned count,
4561 uint32_t incr, uint32_t flags)
4562{
4563 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4564 uint64_t value;
4565 unsigned ndw;
4566
4567 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4568 /* CP */
4569 while (count) {
4570 ndw = 2 + count * 2;
4571 if (ndw > 0x3FFE)
4572 ndw = 0x3FFE;
4573
4574 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4575 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4576 WRITE_DATA_DST_SEL(1));
4577 ib->ptr[ib->length_dw++] = pe;
4578 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4579 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4580 if (flags & RADEON_VM_PAGE_SYSTEM) {
4581 value = radeon_vm_map_gart(rdev, addr);
4582 value &= 0xFFFFFFFFFFFFF000ULL;
4583 } else if (flags & RADEON_VM_PAGE_VALID) {
4584 value = addr;
4585 } else {
4586 value = 0;
4587 }
4588 addr += incr;
4589 value |= r600_flags;
4590 ib->ptr[ib->length_dw++] = value;
4591 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4592 }
4593 }
4594 } else {
4595 /* DMA */
4596 if (flags & RADEON_VM_PAGE_SYSTEM) {
4597 while (count) {
4598 ndw = count * 2;
4599 if (ndw > 0xFFFFE)
4600 ndw = 0xFFFFE;
4601
4602 /* for non-physically contiguous pages (system) */
4603 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4604 ib->ptr[ib->length_dw++] = pe;
4605 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4606 ib->ptr[ib->length_dw++] = ndw;
4607 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4608 if (flags & RADEON_VM_PAGE_SYSTEM) {
4609 value = radeon_vm_map_gart(rdev, addr);
4610 value &= 0xFFFFFFFFFFFFF000ULL;
4611 } else if (flags & RADEON_VM_PAGE_VALID) {
4612 value = addr;
4613 } else {
4614 value = 0;
4615 }
4616 addr += incr;
4617 value |= r600_flags;
4618 ib->ptr[ib->length_dw++] = value;
4619 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4620 }
4621 }
4622 } else {
4623 while (count) {
4624 ndw = count;
4625 if (ndw > 0x7FFFF)
4626 ndw = 0x7FFFF;
4627
4628 if (flags & RADEON_VM_PAGE_VALID)
4629 value = addr;
4630 else
4631 value = 0;
4632 /* for physically contiguous pages (vram) */
4633 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4634 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4635 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4636 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4637 ib->ptr[ib->length_dw++] = 0;
4638 ib->ptr[ib->length_dw++] = value; /* value */
4639 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4640 ib->ptr[ib->length_dw++] = incr; /* increment size */
4641 ib->ptr[ib->length_dw++] = 0;
4642 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4643 pe += ndw * 8;
4644 addr += ndw * incr;
4645 count -= ndw;
4646 }
4647 }
4648 while (ib->length_dw & 0x7)
4649 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4650 }
4651}
4652
4653/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004654 * cik_dma_vm_flush - cik vm flush using sDMA
4655 *
4656 * @rdev: radeon_device pointer
4657 *
4658 * Update the page table base and flush the VM TLB
4659 * using sDMA (CIK).
4660 */
4661void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4662{
4663 struct radeon_ring *ring = &rdev->ring[ridx];
4664 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4665 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4666 u32 ref_and_mask;
4667
4668 if (vm == NULL)
4669 return;
4670
4671 if (ridx == R600_RING_TYPE_DMA_INDEX)
4672 ref_and_mask = SDMA0;
4673 else
4674 ref_and_mask = SDMA1;
4675
4676 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4677 if (vm->id < 8) {
4678 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4679 } else {
4680 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4681 }
4682 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4683
4684 /* update SH_MEM_* regs */
4685 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4686 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4687 radeon_ring_write(ring, VMID(vm->id));
4688
4689 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4690 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4691 radeon_ring_write(ring, 0);
4692
4693 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4694 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4695 radeon_ring_write(ring, 0);
4696
4697 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4698 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4699 radeon_ring_write(ring, 1);
4700
4701 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4702 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4703 radeon_ring_write(ring, 0);
4704
4705 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4706 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4707 radeon_ring_write(ring, VMID(0));
4708
4709 /* flush HDP */
4710 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4711 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4712 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4713 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4714 radeon_ring_write(ring, ref_and_mask); /* MASK */
4715 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4716
4717 /* flush TLB */
4718 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4719 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4720 radeon_ring_write(ring, 1 << vm->id);
4721}
4722
Alex Deucherf6796ca2012-11-09 10:44:08 -05004723/*
4724 * RLC
4725 * The RLC is a multi-purpose microengine that handles a
4726 * variety of functions, the most important of which is
4727 * the interrupt controller.
4728 */
4729/**
4730 * cik_rlc_stop - stop the RLC ME
4731 *
4732 * @rdev: radeon_device pointer
4733 *
4734 * Halt the RLC ME (MicroEngine) (CIK).
4735 */
4736static void cik_rlc_stop(struct radeon_device *rdev)
4737{
4738 int i, j, k;
4739 u32 mask, tmp;
4740
4741 tmp = RREG32(CP_INT_CNTL_RING0);
4742 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4743 WREG32(CP_INT_CNTL_RING0, tmp);
4744
4745 RREG32(CB_CGTT_SCLK_CTRL);
4746 RREG32(CB_CGTT_SCLK_CTRL);
4747 RREG32(CB_CGTT_SCLK_CTRL);
4748 RREG32(CB_CGTT_SCLK_CTRL);
4749
4750 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4751 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4752
4753 WREG32(RLC_CNTL, 0);
4754
4755 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4756 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4757 cik_select_se_sh(rdev, i, j);
4758 for (k = 0; k < rdev->usec_timeout; k++) {
4759 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4760 break;
4761 udelay(1);
4762 }
4763 }
4764 }
4765 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4766
4767 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4768 for (k = 0; k < rdev->usec_timeout; k++) {
4769 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4770 break;
4771 udelay(1);
4772 }
4773}
4774
4775/**
4776 * cik_rlc_start - start the RLC ME
4777 *
4778 * @rdev: radeon_device pointer
4779 *
4780 * Unhalt the RLC ME (MicroEngine) (CIK).
4781 */
4782static void cik_rlc_start(struct radeon_device *rdev)
4783{
4784 u32 tmp;
4785
4786 WREG32(RLC_CNTL, RLC_ENABLE);
4787
4788 tmp = RREG32(CP_INT_CNTL_RING0);
4789 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4790 WREG32(CP_INT_CNTL_RING0, tmp);
4791
4792 udelay(50);
4793}
4794
4795/**
4796 * cik_rlc_resume - setup the RLC hw
4797 *
4798 * @rdev: radeon_device pointer
4799 *
4800 * Initialize the RLC registers, load the ucode,
4801 * and start the RLC (CIK).
4802 * Returns 0 for success, -EINVAL if the ucode is not available.
4803 */
4804static int cik_rlc_resume(struct radeon_device *rdev)
4805{
4806 u32 i, size;
4807 u32 clear_state_info[3];
4808 const __be32 *fw_data;
4809
4810 if (!rdev->rlc_fw)
4811 return -EINVAL;
4812
4813 switch (rdev->family) {
4814 case CHIP_BONAIRE:
4815 default:
4816 size = BONAIRE_RLC_UCODE_SIZE;
4817 break;
4818 case CHIP_KAVERI:
4819 size = KV_RLC_UCODE_SIZE;
4820 break;
4821 case CHIP_KABINI:
4822 size = KB_RLC_UCODE_SIZE;
4823 break;
4824 }
4825
4826 cik_rlc_stop(rdev);
4827
4828 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4829 RREG32(GRBM_SOFT_RESET);
4830 udelay(50);
4831 WREG32(GRBM_SOFT_RESET, 0);
4832 RREG32(GRBM_SOFT_RESET);
4833 udelay(50);
4834
4835 WREG32(RLC_LB_CNTR_INIT, 0);
4836 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4837
4838 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4839 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4840 WREG32(RLC_LB_PARAMS, 0x00600408);
4841 WREG32(RLC_LB_CNTL, 0x80000004);
4842
4843 WREG32(RLC_MC_CNTL, 0);
4844 WREG32(RLC_UCODE_CNTL, 0);
4845
4846 fw_data = (const __be32 *)rdev->rlc_fw->data;
4847 WREG32(RLC_GPM_UCODE_ADDR, 0);
4848 for (i = 0; i < size; i++)
4849 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4850 WREG32(RLC_GPM_UCODE_ADDR, 0);
4851
4852 /* XXX */
4853 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4854 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4855 clear_state_info[2] = 0;//cik_default_size;
4856 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4857 for (i = 0; i < 3; i++)
4858 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4859 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4860
4861 cik_rlc_start(rdev);
4862
4863 return 0;
4864}
Alex Deuchera59781b2012-11-09 10:45:57 -05004865
4866/*
4867 * Interrupts
4868 * Starting with r6xx, interrupts are handled via a ring buffer.
4869 * Ring buffers are areas of GPU accessible memory that the GPU
4870 * writes interrupt vectors into and the host reads vectors out of.
4871 * There is a rptr (read pointer) that determines where the
4872 * host is currently reading, and a wptr (write pointer)
4873 * which determines where the GPU has written. When the
4874 * pointers are equal, the ring is idle. When the GPU
4875 * writes vectors to the ring buffer, it increments the
4876 * wptr. When there is an interrupt, the host then starts
4877 * fetching commands and processing them until the pointers are
4878 * equal again at which point it updates the rptr.
4879 */
4880
4881/**
4882 * cik_enable_interrupts - Enable the interrupt ring buffer
4883 *
4884 * @rdev: radeon_device pointer
4885 *
4886 * Enable the interrupt ring buffer (CIK).
4887 */
4888static void cik_enable_interrupts(struct radeon_device *rdev)
4889{
4890 u32 ih_cntl = RREG32(IH_CNTL);
4891 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4892
4893 ih_cntl |= ENABLE_INTR;
4894 ih_rb_cntl |= IH_RB_ENABLE;
4895 WREG32(IH_CNTL, ih_cntl);
4896 WREG32(IH_RB_CNTL, ih_rb_cntl);
4897 rdev->ih.enabled = true;
4898}
4899
4900/**
4901 * cik_disable_interrupts - Disable the interrupt ring buffer
4902 *
4903 * @rdev: radeon_device pointer
4904 *
4905 * Disable the interrupt ring buffer (CIK).
4906 */
4907static void cik_disable_interrupts(struct radeon_device *rdev)
4908{
4909 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4910 u32 ih_cntl = RREG32(IH_CNTL);
4911
4912 ih_rb_cntl &= ~IH_RB_ENABLE;
4913 ih_cntl &= ~ENABLE_INTR;
4914 WREG32(IH_RB_CNTL, ih_rb_cntl);
4915 WREG32(IH_CNTL, ih_cntl);
4916 /* set rptr, wptr to 0 */
4917 WREG32(IH_RB_RPTR, 0);
4918 WREG32(IH_RB_WPTR, 0);
4919 rdev->ih.enabled = false;
4920 rdev->ih.rptr = 0;
4921}
4922
4923/**
4924 * cik_disable_interrupt_state - Disable all interrupt sources
4925 *
4926 * @rdev: radeon_device pointer
4927 *
4928 * Clear all interrupt enable bits used by the driver (CIK).
4929 */
4930static void cik_disable_interrupt_state(struct radeon_device *rdev)
4931{
4932 u32 tmp;
4933
4934 /* gfx ring */
4935 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004936 /* sdma */
4937 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4938 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4939 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4940 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004941 /* compute queues */
4942 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4943 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4944 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4945 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4946 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4947 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4948 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4949 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4950 /* grbm */
4951 WREG32(GRBM_INT_CNTL, 0);
4952 /* vline/vblank, etc. */
4953 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4954 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4955 if (rdev->num_crtc >= 4) {
4956 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4957 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4958 }
4959 if (rdev->num_crtc >= 6) {
4960 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4961 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4962 }
4963
4964 /* dac hotplug */
4965 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4966
4967 /* digital hotplug */
4968 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4969 WREG32(DC_HPD1_INT_CONTROL, tmp);
4970 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4971 WREG32(DC_HPD2_INT_CONTROL, tmp);
4972 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4973 WREG32(DC_HPD3_INT_CONTROL, tmp);
4974 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4975 WREG32(DC_HPD4_INT_CONTROL, tmp);
4976 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4977 WREG32(DC_HPD5_INT_CONTROL, tmp);
4978 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4979 WREG32(DC_HPD6_INT_CONTROL, tmp);
4980
4981}
4982
4983/**
4984 * cik_irq_init - init and enable the interrupt ring
4985 *
4986 * @rdev: radeon_device pointer
4987 *
4988 * Allocate a ring buffer for the interrupt controller,
4989 * enable the RLC, disable interrupts, enable the IH
4990 * ring buffer and enable it (CIK).
4991 * Called at device load and reume.
4992 * Returns 0 for success, errors for failure.
4993 */
4994static int cik_irq_init(struct radeon_device *rdev)
4995{
4996 int ret = 0;
4997 int rb_bufsz;
4998 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4999
5000 /* allocate ring */
5001 ret = r600_ih_ring_alloc(rdev);
5002 if (ret)
5003 return ret;
5004
5005 /* disable irqs */
5006 cik_disable_interrupts(rdev);
5007
5008 /* init rlc */
5009 ret = cik_rlc_resume(rdev);
5010 if (ret) {
5011 r600_ih_ring_fini(rdev);
5012 return ret;
5013 }
5014
5015 /* setup interrupt control */
5016 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5017 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5018 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5019 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5020 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5021 */
5022 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5023 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5024 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5025 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5026
5027 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5028 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5029
5030 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5031 IH_WPTR_OVERFLOW_CLEAR |
5032 (rb_bufsz << 1));
5033
5034 if (rdev->wb.enabled)
5035 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5036
5037 /* set the writeback address whether it's enabled or not */
5038 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5039 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5040
5041 WREG32(IH_RB_CNTL, ih_rb_cntl);
5042
5043 /* set rptr, wptr to 0 */
5044 WREG32(IH_RB_RPTR, 0);
5045 WREG32(IH_RB_WPTR, 0);
5046
5047 /* Default settings for IH_CNTL (disabled at first) */
5048 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5049 /* RPTR_REARM only works if msi's are enabled */
5050 if (rdev->msi_enabled)
5051 ih_cntl |= RPTR_REARM;
5052 WREG32(IH_CNTL, ih_cntl);
5053
5054 /* force the active interrupt state to all disabled */
5055 cik_disable_interrupt_state(rdev);
5056
5057 pci_set_master(rdev->pdev);
5058
5059 /* enable irqs */
5060 cik_enable_interrupts(rdev);
5061
5062 return ret;
5063}
5064
5065/**
5066 * cik_irq_set - enable/disable interrupt sources
5067 *
5068 * @rdev: radeon_device pointer
5069 *
5070 * Enable interrupt sources on the GPU (vblanks, hpd,
5071 * etc.) (CIK).
5072 * Returns 0 for success, errors for failure.
5073 */
5074int cik_irq_set(struct radeon_device *rdev)
5075{
5076 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5077 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005078 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5079 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005080 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5081 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5082 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005083 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005084
5085 if (!rdev->irq.installed) {
5086 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5087 return -EINVAL;
5088 }
5089 /* don't enable anything if the ih is disabled */
5090 if (!rdev->ih.enabled) {
5091 cik_disable_interrupts(rdev);
5092 /* force the active interrupt state to all disabled */
5093 cik_disable_interrupt_state(rdev);
5094 return 0;
5095 }
5096
5097 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5098 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5099 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5100 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5101 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5102 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103
Alex Deucher21a93e12013-04-09 12:47:11 -04005104 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5105 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5106
Alex Deucher2b0781a2013-04-09 14:26:16 -04005107 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5108 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5109 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5110 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5111 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5112 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115
Alex Deuchera59781b2012-11-09 10:45:57 -05005116 /* enable CP interrupts on all rings */
5117 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5118 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5119 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5120 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005121 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5122 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5123 DRM_DEBUG("si_irq_set: sw int cp1\n");
5124 if (ring->me == 1) {
5125 switch (ring->pipe) {
5126 case 0:
5127 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5128 break;
5129 case 1:
5130 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5131 break;
5132 case 2:
5133 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5134 break;
5135 case 3:
5136 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5137 break;
5138 default:
5139 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5140 break;
5141 }
5142 } else if (ring->me == 2) {
5143 switch (ring->pipe) {
5144 case 0:
5145 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5146 break;
5147 case 1:
5148 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5149 break;
5150 case 2:
5151 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5152 break;
5153 case 3:
5154 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5155 break;
5156 default:
5157 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5158 break;
5159 }
5160 } else {
5161 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5162 }
5163 }
5164 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5165 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5166 DRM_DEBUG("si_irq_set: sw int cp2\n");
5167 if (ring->me == 1) {
5168 switch (ring->pipe) {
5169 case 0:
5170 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5171 break;
5172 case 1:
5173 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5174 break;
5175 case 2:
5176 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5177 break;
5178 case 3:
5179 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5180 break;
5181 default:
5182 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5183 break;
5184 }
5185 } else if (ring->me == 2) {
5186 switch (ring->pipe) {
5187 case 0:
5188 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5189 break;
5190 case 1:
5191 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5192 break;
5193 case 2:
5194 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5195 break;
5196 case 3:
5197 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5198 break;
5199 default:
5200 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5201 break;
5202 }
5203 } else {
5204 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5205 }
5206 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005207
Alex Deucher21a93e12013-04-09 12:47:11 -04005208 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5209 DRM_DEBUG("cik_irq_set: sw int dma\n");
5210 dma_cntl |= TRAP_ENABLE;
5211 }
5212
5213 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5214 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5215 dma_cntl1 |= TRAP_ENABLE;
5216 }
5217
Alex Deuchera59781b2012-11-09 10:45:57 -05005218 if (rdev->irq.crtc_vblank_int[0] ||
5219 atomic_read(&rdev->irq.pflip[0])) {
5220 DRM_DEBUG("cik_irq_set: vblank 0\n");
5221 crtc1 |= VBLANK_INTERRUPT_MASK;
5222 }
5223 if (rdev->irq.crtc_vblank_int[1] ||
5224 atomic_read(&rdev->irq.pflip[1])) {
5225 DRM_DEBUG("cik_irq_set: vblank 1\n");
5226 crtc2 |= VBLANK_INTERRUPT_MASK;
5227 }
5228 if (rdev->irq.crtc_vblank_int[2] ||
5229 atomic_read(&rdev->irq.pflip[2])) {
5230 DRM_DEBUG("cik_irq_set: vblank 2\n");
5231 crtc3 |= VBLANK_INTERRUPT_MASK;
5232 }
5233 if (rdev->irq.crtc_vblank_int[3] ||
5234 atomic_read(&rdev->irq.pflip[3])) {
5235 DRM_DEBUG("cik_irq_set: vblank 3\n");
5236 crtc4 |= VBLANK_INTERRUPT_MASK;
5237 }
5238 if (rdev->irq.crtc_vblank_int[4] ||
5239 atomic_read(&rdev->irq.pflip[4])) {
5240 DRM_DEBUG("cik_irq_set: vblank 4\n");
5241 crtc5 |= VBLANK_INTERRUPT_MASK;
5242 }
5243 if (rdev->irq.crtc_vblank_int[5] ||
5244 atomic_read(&rdev->irq.pflip[5])) {
5245 DRM_DEBUG("cik_irq_set: vblank 5\n");
5246 crtc6 |= VBLANK_INTERRUPT_MASK;
5247 }
5248 if (rdev->irq.hpd[0]) {
5249 DRM_DEBUG("cik_irq_set: hpd 1\n");
5250 hpd1 |= DC_HPDx_INT_EN;
5251 }
5252 if (rdev->irq.hpd[1]) {
5253 DRM_DEBUG("cik_irq_set: hpd 2\n");
5254 hpd2 |= DC_HPDx_INT_EN;
5255 }
5256 if (rdev->irq.hpd[2]) {
5257 DRM_DEBUG("cik_irq_set: hpd 3\n");
5258 hpd3 |= DC_HPDx_INT_EN;
5259 }
5260 if (rdev->irq.hpd[3]) {
5261 DRM_DEBUG("cik_irq_set: hpd 4\n");
5262 hpd4 |= DC_HPDx_INT_EN;
5263 }
5264 if (rdev->irq.hpd[4]) {
5265 DRM_DEBUG("cik_irq_set: hpd 5\n");
5266 hpd5 |= DC_HPDx_INT_EN;
5267 }
5268 if (rdev->irq.hpd[5]) {
5269 DRM_DEBUG("cik_irq_set: hpd 6\n");
5270 hpd6 |= DC_HPDx_INT_EN;
5271 }
5272
5273 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5274
Alex Deucher21a93e12013-04-09 12:47:11 -04005275 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5276 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5277
Alex Deucher2b0781a2013-04-09 14:26:16 -04005278 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5279 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5280 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5281 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5282 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5283 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5284 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5285 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5286
Alex Deuchera59781b2012-11-09 10:45:57 -05005287 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5288
5289 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5290 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5291 if (rdev->num_crtc >= 4) {
5292 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5293 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5294 }
5295 if (rdev->num_crtc >= 6) {
5296 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5297 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5298 }
5299
5300 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5301 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5302 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5303 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5304 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5305 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5306
5307 return 0;
5308}
5309
5310/**
5311 * cik_irq_ack - ack interrupt sources
5312 *
5313 * @rdev: radeon_device pointer
5314 *
5315 * Ack interrupt sources on the GPU (vblanks, hpd,
5316 * etc.) (CIK). Certain interrupts sources are sw
5317 * generated and do not require an explicit ack.
5318 */
5319static inline void cik_irq_ack(struct radeon_device *rdev)
5320{
5321 u32 tmp;
5322
5323 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5324 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5325 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5326 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5327 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5328 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5329 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5330
5331 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5332 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5333 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5334 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5335 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5336 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5337 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5338 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5339
5340 if (rdev->num_crtc >= 4) {
5341 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5342 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5343 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5344 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5345 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5346 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5347 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5348 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5349 }
5350
5351 if (rdev->num_crtc >= 6) {
5352 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5354 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5356 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5357 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5358 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5359 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5360 }
5361
5362 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5363 tmp = RREG32(DC_HPD1_INT_CONTROL);
5364 tmp |= DC_HPDx_INT_ACK;
5365 WREG32(DC_HPD1_INT_CONTROL, tmp);
5366 }
5367 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5368 tmp = RREG32(DC_HPD2_INT_CONTROL);
5369 tmp |= DC_HPDx_INT_ACK;
5370 WREG32(DC_HPD2_INT_CONTROL, tmp);
5371 }
5372 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5373 tmp = RREG32(DC_HPD3_INT_CONTROL);
5374 tmp |= DC_HPDx_INT_ACK;
5375 WREG32(DC_HPD3_INT_CONTROL, tmp);
5376 }
5377 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5378 tmp = RREG32(DC_HPD4_INT_CONTROL);
5379 tmp |= DC_HPDx_INT_ACK;
5380 WREG32(DC_HPD4_INT_CONTROL, tmp);
5381 }
5382 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5383 tmp = RREG32(DC_HPD5_INT_CONTROL);
5384 tmp |= DC_HPDx_INT_ACK;
5385 WREG32(DC_HPD5_INT_CONTROL, tmp);
5386 }
5387 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5388 tmp = RREG32(DC_HPD5_INT_CONTROL);
5389 tmp |= DC_HPDx_INT_ACK;
5390 WREG32(DC_HPD6_INT_CONTROL, tmp);
5391 }
5392}
5393
5394/**
5395 * cik_irq_disable - disable interrupts
5396 *
5397 * @rdev: radeon_device pointer
5398 *
5399 * Disable interrupts on the hw (CIK).
5400 */
5401static void cik_irq_disable(struct radeon_device *rdev)
5402{
5403 cik_disable_interrupts(rdev);
5404 /* Wait and acknowledge irq */
5405 mdelay(1);
5406 cik_irq_ack(rdev);
5407 cik_disable_interrupt_state(rdev);
5408}
5409
5410/**
5411 * cik_irq_disable - disable interrupts for suspend
5412 *
5413 * @rdev: radeon_device pointer
5414 *
5415 * Disable interrupts and stop the RLC (CIK).
5416 * Used for suspend.
5417 */
5418static void cik_irq_suspend(struct radeon_device *rdev)
5419{
5420 cik_irq_disable(rdev);
5421 cik_rlc_stop(rdev);
5422}
5423
5424/**
5425 * cik_irq_fini - tear down interrupt support
5426 *
5427 * @rdev: radeon_device pointer
5428 *
5429 * Disable interrupts on the hw and free the IH ring
5430 * buffer (CIK).
5431 * Used for driver unload.
5432 */
5433static void cik_irq_fini(struct radeon_device *rdev)
5434{
5435 cik_irq_suspend(rdev);
5436 r600_ih_ring_fini(rdev);
5437}
5438
5439/**
5440 * cik_get_ih_wptr - get the IH ring buffer wptr
5441 *
5442 * @rdev: radeon_device pointer
5443 *
5444 * Get the IH ring buffer wptr from either the register
5445 * or the writeback memory buffer (CIK). Also check for
5446 * ring buffer overflow and deal with it.
5447 * Used by cik_irq_process().
5448 * Returns the value of the wptr.
5449 */
5450static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5451{
5452 u32 wptr, tmp;
5453
5454 if (rdev->wb.enabled)
5455 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5456 else
5457 wptr = RREG32(IH_RB_WPTR);
5458
5459 if (wptr & RB_OVERFLOW) {
5460 /* When a ring buffer overflow happen start parsing interrupt
5461 * from the last not overwritten vector (wptr + 16). Hopefully
5462 * this should allow us to catchup.
5463 */
5464 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5465 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5466 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5467 tmp = RREG32(IH_RB_CNTL);
5468 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5469 WREG32(IH_RB_CNTL, tmp);
5470 }
5471 return (wptr & rdev->ih.ptr_mask);
5472}
5473
5474/* CIK IV Ring
5475 * Each IV ring entry is 128 bits:
5476 * [7:0] - interrupt source id
5477 * [31:8] - reserved
5478 * [59:32] - interrupt source data
5479 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005480 * [71:64] - RINGID
5481 * CP:
5482 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005483 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5484 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5485 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5486 * PIPE_ID - ME0 0=3D
5487 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005488 * SDMA:
5489 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5490 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5491 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005492 * [79:72] - VMID
5493 * [95:80] - PASID
5494 * [127:96] - reserved
5495 */
5496/**
5497 * cik_irq_process - interrupt handler
5498 *
5499 * @rdev: radeon_device pointer
5500 *
5501 * Interrupt hander (CIK). Walk the IH ring,
5502 * ack interrupts and schedule work to handle
5503 * interrupt events.
5504 * Returns irq process return code.
5505 */
5506int cik_irq_process(struct radeon_device *rdev)
5507{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005508 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5509 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005510 u32 wptr;
5511 u32 rptr;
5512 u32 src_id, src_data, ring_id;
5513 u8 me_id, pipe_id, queue_id;
5514 u32 ring_index;
5515 bool queue_hotplug = false;
5516 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04005517 u32 addr, status, mc_client;
Alex Deuchera59781b2012-11-09 10:45:57 -05005518
5519 if (!rdev->ih.enabled || rdev->shutdown)
5520 return IRQ_NONE;
5521
5522 wptr = cik_get_ih_wptr(rdev);
5523
5524restart_ih:
5525 /* is somebody else already processing irqs? */
5526 if (atomic_xchg(&rdev->ih.lock, 1))
5527 return IRQ_NONE;
5528
5529 rptr = rdev->ih.rptr;
5530 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5531
5532 /* Order reading of wptr vs. reading of IH ring data */
5533 rmb();
5534
5535 /* display interrupts */
5536 cik_irq_ack(rdev);
5537
5538 while (rptr != wptr) {
5539 /* wptr/rptr are in bytes! */
5540 ring_index = rptr / 4;
5541 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5542 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5543 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005544
5545 switch (src_id) {
5546 case 1: /* D1 vblank/vline */
5547 switch (src_data) {
5548 case 0: /* D1 vblank */
5549 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5550 if (rdev->irq.crtc_vblank_int[0]) {
5551 drm_handle_vblank(rdev->ddev, 0);
5552 rdev->pm.vblank_sync = true;
5553 wake_up(&rdev->irq.vblank_queue);
5554 }
5555 if (atomic_read(&rdev->irq.pflip[0]))
5556 radeon_crtc_handle_flip(rdev, 0);
5557 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5558 DRM_DEBUG("IH: D1 vblank\n");
5559 }
5560 break;
5561 case 1: /* D1 vline */
5562 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5563 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5564 DRM_DEBUG("IH: D1 vline\n");
5565 }
5566 break;
5567 default:
5568 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5569 break;
5570 }
5571 break;
5572 case 2: /* D2 vblank/vline */
5573 switch (src_data) {
5574 case 0: /* D2 vblank */
5575 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5576 if (rdev->irq.crtc_vblank_int[1]) {
5577 drm_handle_vblank(rdev->ddev, 1);
5578 rdev->pm.vblank_sync = true;
5579 wake_up(&rdev->irq.vblank_queue);
5580 }
5581 if (atomic_read(&rdev->irq.pflip[1]))
5582 radeon_crtc_handle_flip(rdev, 1);
5583 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5584 DRM_DEBUG("IH: D2 vblank\n");
5585 }
5586 break;
5587 case 1: /* D2 vline */
5588 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5589 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5590 DRM_DEBUG("IH: D2 vline\n");
5591 }
5592 break;
5593 default:
5594 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5595 break;
5596 }
5597 break;
5598 case 3: /* D3 vblank/vline */
5599 switch (src_data) {
5600 case 0: /* D3 vblank */
5601 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5602 if (rdev->irq.crtc_vblank_int[2]) {
5603 drm_handle_vblank(rdev->ddev, 2);
5604 rdev->pm.vblank_sync = true;
5605 wake_up(&rdev->irq.vblank_queue);
5606 }
5607 if (atomic_read(&rdev->irq.pflip[2]))
5608 radeon_crtc_handle_flip(rdev, 2);
5609 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5610 DRM_DEBUG("IH: D3 vblank\n");
5611 }
5612 break;
5613 case 1: /* D3 vline */
5614 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5615 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5616 DRM_DEBUG("IH: D3 vline\n");
5617 }
5618 break;
5619 default:
5620 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5621 break;
5622 }
5623 break;
5624 case 4: /* D4 vblank/vline */
5625 switch (src_data) {
5626 case 0: /* D4 vblank */
5627 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5628 if (rdev->irq.crtc_vblank_int[3]) {
5629 drm_handle_vblank(rdev->ddev, 3);
5630 rdev->pm.vblank_sync = true;
5631 wake_up(&rdev->irq.vblank_queue);
5632 }
5633 if (atomic_read(&rdev->irq.pflip[3]))
5634 radeon_crtc_handle_flip(rdev, 3);
5635 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5636 DRM_DEBUG("IH: D4 vblank\n");
5637 }
5638 break;
5639 case 1: /* D4 vline */
5640 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5641 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5642 DRM_DEBUG("IH: D4 vline\n");
5643 }
5644 break;
5645 default:
5646 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5647 break;
5648 }
5649 break;
5650 case 5: /* D5 vblank/vline */
5651 switch (src_data) {
5652 case 0: /* D5 vblank */
5653 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5654 if (rdev->irq.crtc_vblank_int[4]) {
5655 drm_handle_vblank(rdev->ddev, 4);
5656 rdev->pm.vblank_sync = true;
5657 wake_up(&rdev->irq.vblank_queue);
5658 }
5659 if (atomic_read(&rdev->irq.pflip[4]))
5660 radeon_crtc_handle_flip(rdev, 4);
5661 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5662 DRM_DEBUG("IH: D5 vblank\n");
5663 }
5664 break;
5665 case 1: /* D5 vline */
5666 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5667 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5668 DRM_DEBUG("IH: D5 vline\n");
5669 }
5670 break;
5671 default:
5672 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5673 break;
5674 }
5675 break;
5676 case 6: /* D6 vblank/vline */
5677 switch (src_data) {
5678 case 0: /* D6 vblank */
5679 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5680 if (rdev->irq.crtc_vblank_int[5]) {
5681 drm_handle_vblank(rdev->ddev, 5);
5682 rdev->pm.vblank_sync = true;
5683 wake_up(&rdev->irq.vblank_queue);
5684 }
5685 if (atomic_read(&rdev->irq.pflip[5]))
5686 radeon_crtc_handle_flip(rdev, 5);
5687 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5688 DRM_DEBUG("IH: D6 vblank\n");
5689 }
5690 break;
5691 case 1: /* D6 vline */
5692 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5693 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5694 DRM_DEBUG("IH: D6 vline\n");
5695 }
5696 break;
5697 default:
5698 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5699 break;
5700 }
5701 break;
5702 case 42: /* HPD hotplug */
5703 switch (src_data) {
5704 case 0:
5705 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5706 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5707 queue_hotplug = true;
5708 DRM_DEBUG("IH: HPD1\n");
5709 }
5710 break;
5711 case 1:
5712 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5713 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5714 queue_hotplug = true;
5715 DRM_DEBUG("IH: HPD2\n");
5716 }
5717 break;
5718 case 2:
5719 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5720 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5721 queue_hotplug = true;
5722 DRM_DEBUG("IH: HPD3\n");
5723 }
5724 break;
5725 case 3:
5726 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5727 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5728 queue_hotplug = true;
5729 DRM_DEBUG("IH: HPD4\n");
5730 }
5731 break;
5732 case 4:
5733 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5734 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5735 queue_hotplug = true;
5736 DRM_DEBUG("IH: HPD5\n");
5737 }
5738 break;
5739 case 5:
5740 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5741 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5742 queue_hotplug = true;
5743 DRM_DEBUG("IH: HPD6\n");
5744 }
5745 break;
5746 default:
5747 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5748 break;
5749 }
5750 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005751 case 146:
5752 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04005753 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5754 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5755 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04005756 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5757 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005758 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04005759 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005760 status);
5761 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04005762 /* reset addr and status */
5763 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5764 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005765 case 176: /* GFX RB CP_INT */
5766 case 177: /* GFX IB CP_INT */
5767 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5768 break;
5769 case 181: /* CP EOP event */
5770 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005771 /* XXX check the bitfield order! */
5772 me_id = (ring_id & 0x60) >> 5;
5773 pipe_id = (ring_id & 0x18) >> 3;
5774 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005775 switch (me_id) {
5776 case 0:
5777 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5778 break;
5779 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005780 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005781 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5782 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5783 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5784 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005785 break;
5786 }
5787 break;
5788 case 184: /* CP Privileged reg access */
5789 DRM_ERROR("Illegal register access in command stream\n");
5790 /* XXX check the bitfield order! */
5791 me_id = (ring_id & 0x60) >> 5;
5792 pipe_id = (ring_id & 0x18) >> 3;
5793 queue_id = (ring_id & 0x7) >> 0;
5794 switch (me_id) {
5795 case 0:
5796 /* This results in a full GPU reset, but all we need to do is soft
5797 * reset the CP for gfx
5798 */
5799 queue_reset = true;
5800 break;
5801 case 1:
5802 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005803 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005804 break;
5805 case 2:
5806 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005807 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005808 break;
5809 }
5810 break;
5811 case 185: /* CP Privileged inst */
5812 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005813 /* XXX check the bitfield order! */
5814 me_id = (ring_id & 0x60) >> 5;
5815 pipe_id = (ring_id & 0x18) >> 3;
5816 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005817 switch (me_id) {
5818 case 0:
5819 /* This results in a full GPU reset, but all we need to do is soft
5820 * reset the CP for gfx
5821 */
5822 queue_reset = true;
5823 break;
5824 case 1:
5825 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005826 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005827 break;
5828 case 2:
5829 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005830 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005831 break;
5832 }
5833 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005834 case 224: /* SDMA trap event */
5835 /* XXX check the bitfield order! */
5836 me_id = (ring_id & 0x3) >> 0;
5837 queue_id = (ring_id & 0xc) >> 2;
5838 DRM_DEBUG("IH: SDMA trap\n");
5839 switch (me_id) {
5840 case 0:
5841 switch (queue_id) {
5842 case 0:
5843 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5844 break;
5845 case 1:
5846 /* XXX compute */
5847 break;
5848 case 2:
5849 /* XXX compute */
5850 break;
5851 }
5852 break;
5853 case 1:
5854 switch (queue_id) {
5855 case 0:
5856 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5857 break;
5858 case 1:
5859 /* XXX compute */
5860 break;
5861 case 2:
5862 /* XXX compute */
5863 break;
5864 }
5865 break;
5866 }
5867 break;
5868 case 241: /* SDMA Privileged inst */
5869 case 247: /* SDMA Privileged inst */
5870 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5871 /* XXX check the bitfield order! */
5872 me_id = (ring_id & 0x3) >> 0;
5873 queue_id = (ring_id & 0xc) >> 2;
5874 switch (me_id) {
5875 case 0:
5876 switch (queue_id) {
5877 case 0:
5878 queue_reset = true;
5879 break;
5880 case 1:
5881 /* XXX compute */
5882 queue_reset = true;
5883 break;
5884 case 2:
5885 /* XXX compute */
5886 queue_reset = true;
5887 break;
5888 }
5889 break;
5890 case 1:
5891 switch (queue_id) {
5892 case 0:
5893 queue_reset = true;
5894 break;
5895 case 1:
5896 /* XXX compute */
5897 queue_reset = true;
5898 break;
5899 case 2:
5900 /* XXX compute */
5901 queue_reset = true;
5902 break;
5903 }
5904 break;
5905 }
5906 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005907 case 233: /* GUI IDLE */
5908 DRM_DEBUG("IH: GUI idle\n");
5909 break;
5910 default:
5911 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5912 break;
5913 }
5914
5915 /* wptr/rptr are in bytes! */
5916 rptr += 16;
5917 rptr &= rdev->ih.ptr_mask;
5918 }
5919 if (queue_hotplug)
5920 schedule_work(&rdev->hotplug_work);
5921 if (queue_reset)
5922 schedule_work(&rdev->reset_work);
5923 rdev->ih.rptr = rptr;
5924 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5925 atomic_set(&rdev->ih.lock, 0);
5926
5927 /* make sure wptr hasn't changed while processing */
5928 wptr = cik_get_ih_wptr(rdev);
5929 if (wptr != rptr)
5930 goto restart_ih;
5931
5932 return IRQ_HANDLED;
5933}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005934
5935/*
5936 * startup/shutdown callbacks
5937 */
5938/**
5939 * cik_startup - program the asic to a functional state
5940 *
5941 * @rdev: radeon_device pointer
5942 *
5943 * Programs the asic to a functional state (CIK).
5944 * Called by cik_init() and cik_resume().
5945 * Returns 0 for success, error for failure.
5946 */
5947static int cik_startup(struct radeon_device *rdev)
5948{
5949 struct radeon_ring *ring;
5950 int r;
5951
Alex Deucher6fab3feb2013-08-04 12:13:17 -04005952 cik_mc_program(rdev);
5953
Alex Deucher7bf94a22012-08-17 11:48:29 -04005954 if (rdev->flags & RADEON_IS_IGP) {
5955 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5956 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5957 r = cik_init_microcode(rdev);
5958 if (r) {
5959 DRM_ERROR("Failed to load firmware!\n");
5960 return r;
5961 }
5962 }
5963 } else {
5964 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5965 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5966 !rdev->mc_fw) {
5967 r = cik_init_microcode(rdev);
5968 if (r) {
5969 DRM_ERROR("Failed to load firmware!\n");
5970 return r;
5971 }
5972 }
5973
5974 r = ci_mc_load_microcode(rdev);
5975 if (r) {
5976 DRM_ERROR("Failed to load MC firmware!\n");
5977 return r;
5978 }
5979 }
5980
5981 r = r600_vram_scratch_init(rdev);
5982 if (r)
5983 return r;
5984
Alex Deucher7bf94a22012-08-17 11:48:29 -04005985 r = cik_pcie_gart_enable(rdev);
5986 if (r)
5987 return r;
5988 cik_gpu_init(rdev);
5989
5990 /* allocate rlc buffers */
5991 r = si_rlc_init(rdev);
5992 if (r) {
5993 DRM_ERROR("Failed to init rlc BOs!\n");
5994 return r;
5995 }
5996
5997 /* allocate wb buffer */
5998 r = radeon_wb_init(rdev);
5999 if (r)
6000 return r;
6001
Alex Deucher963e81f2013-06-26 17:37:11 -04006002 /* allocate mec buffers */
6003 r = cik_mec_init(rdev);
6004 if (r) {
6005 DRM_ERROR("Failed to init MEC BOs!\n");
6006 return r;
6007 }
6008
Alex Deucher7bf94a22012-08-17 11:48:29 -04006009 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6010 if (r) {
6011 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6012 return r;
6013 }
6014
Alex Deucher963e81f2013-06-26 17:37:11 -04006015 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6016 if (r) {
6017 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6018 return r;
6019 }
6020
6021 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6022 if (r) {
6023 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6024 return r;
6025 }
6026
Alex Deucher7bf94a22012-08-17 11:48:29 -04006027 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6028 if (r) {
6029 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6030 return r;
6031 }
6032
6033 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6034 if (r) {
6035 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6036 return r;
6037 }
6038
Christian König87167bb2013-04-09 13:39:21 -04006039 r = cik_uvd_resume(rdev);
6040 if (!r) {
6041 r = radeon_fence_driver_start_ring(rdev,
6042 R600_RING_TYPE_UVD_INDEX);
6043 if (r)
6044 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6045 }
6046 if (r)
6047 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6048
Alex Deucher7bf94a22012-08-17 11:48:29 -04006049 /* Enable IRQ */
6050 if (!rdev->irq.installed) {
6051 r = radeon_irq_kms_init(rdev);
6052 if (r)
6053 return r;
6054 }
6055
6056 r = cik_irq_init(rdev);
6057 if (r) {
6058 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6059 radeon_irq_kms_fini(rdev);
6060 return r;
6061 }
6062 cik_irq_set(rdev);
6063
6064 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6065 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6066 CP_RB0_RPTR, CP_RB0_WPTR,
6067 0, 0xfffff, RADEON_CP_PACKET2);
6068 if (r)
6069 return r;
6070
Alex Deucher963e81f2013-06-26 17:37:11 -04006071 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006072 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006073 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6074 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6075 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006076 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006077 if (r)
6078 return r;
6079 ring->me = 1; /* first MEC */
6080 ring->pipe = 0; /* first pipe */
6081 ring->queue = 0; /* first queue */
6082 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6083
Alex Deucher2615b532013-06-03 11:21:58 -04006084 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006085 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6086 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6087 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006088 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006089 if (r)
6090 return r;
6091 /* dGPU only have 1 MEC */
6092 ring->me = 1; /* first MEC */
6093 ring->pipe = 0; /* first pipe */
6094 ring->queue = 1; /* second queue */
6095 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6096
Alex Deucher7bf94a22012-08-17 11:48:29 -04006097 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6098 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6099 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6100 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6101 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6102 if (r)
6103 return r;
6104
6105 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6106 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6107 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6108 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6109 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6110 if (r)
6111 return r;
6112
6113 r = cik_cp_resume(rdev);
6114 if (r)
6115 return r;
6116
6117 r = cik_sdma_resume(rdev);
6118 if (r)
6119 return r;
6120
Christian König87167bb2013-04-09 13:39:21 -04006121 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6122 if (ring->ring_size) {
6123 r = radeon_ring_init(rdev, ring, ring->ring_size,
6124 R600_WB_UVD_RPTR_OFFSET,
6125 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6126 0, 0xfffff, RADEON_CP_PACKET2);
6127 if (!r)
6128 r = r600_uvd_init(rdev);
6129 if (r)
6130 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6131 }
6132
Alex Deucher7bf94a22012-08-17 11:48:29 -04006133 r = radeon_ib_pool_init(rdev);
6134 if (r) {
6135 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6136 return r;
6137 }
6138
6139 r = radeon_vm_manager_init(rdev);
6140 if (r) {
6141 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6142 return r;
6143 }
6144
6145 return 0;
6146}
6147
6148/**
6149 * cik_resume - resume the asic to a functional state
6150 *
6151 * @rdev: radeon_device pointer
6152 *
6153 * Programs the asic to a functional state (CIK).
6154 * Called at resume.
6155 * Returns 0 for success, error for failure.
6156 */
6157int cik_resume(struct radeon_device *rdev)
6158{
6159 int r;
6160
6161 /* post card */
6162 atom_asic_init(rdev->mode_info.atom_context);
6163
Alex Deucher0aafd312013-04-09 14:43:30 -04006164 /* init golden registers */
6165 cik_init_golden_registers(rdev);
6166
Alex Deucher7bf94a22012-08-17 11:48:29 -04006167 rdev->accel_working = true;
6168 r = cik_startup(rdev);
6169 if (r) {
6170 DRM_ERROR("cik startup failed on resume\n");
6171 rdev->accel_working = false;
6172 return r;
6173 }
6174
6175 return r;
6176
6177}
6178
6179/**
6180 * cik_suspend - suspend the asic
6181 *
6182 * @rdev: radeon_device pointer
6183 *
6184 * Bring the chip into a state suitable for suspend (CIK).
6185 * Called at suspend.
6186 * Returns 0 for success.
6187 */
6188int cik_suspend(struct radeon_device *rdev)
6189{
6190 radeon_vm_manager_fini(rdev);
6191 cik_cp_enable(rdev, false);
6192 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02006193 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006194 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006195 cik_irq_suspend(rdev);
6196 radeon_wb_disable(rdev);
6197 cik_pcie_gart_disable(rdev);
6198 return 0;
6199}
6200
6201/* Plan is to move initialization in that function and use
6202 * helper function so that radeon_device_init pretty much
6203 * do nothing more than calling asic specific function. This
6204 * should also allow to remove a bunch of callback function
6205 * like vram_info.
6206 */
6207/**
6208 * cik_init - asic specific driver and hw init
6209 *
6210 * @rdev: radeon_device pointer
6211 *
6212 * Setup asic specific driver variables and program the hw
6213 * to a functional state (CIK).
6214 * Called at driver startup.
6215 * Returns 0 for success, errors for failure.
6216 */
6217int cik_init(struct radeon_device *rdev)
6218{
6219 struct radeon_ring *ring;
6220 int r;
6221
6222 /* Read BIOS */
6223 if (!radeon_get_bios(rdev)) {
6224 if (ASIC_IS_AVIVO(rdev))
6225 return -EINVAL;
6226 }
6227 /* Must be an ATOMBIOS */
6228 if (!rdev->is_atom_bios) {
6229 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6230 return -EINVAL;
6231 }
6232 r = radeon_atombios_init(rdev);
6233 if (r)
6234 return r;
6235
6236 /* Post card if necessary */
6237 if (!radeon_card_posted(rdev)) {
6238 if (!rdev->bios) {
6239 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6240 return -EINVAL;
6241 }
6242 DRM_INFO("GPU not posted. posting now...\n");
6243 atom_asic_init(rdev->mode_info.atom_context);
6244 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006245 /* init golden registers */
6246 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006247 /* Initialize scratch registers */
6248 cik_scratch_init(rdev);
6249 /* Initialize surface registers */
6250 radeon_surface_init(rdev);
6251 /* Initialize clocks */
6252 radeon_get_clock_info(rdev->ddev);
6253
6254 /* Fence driver */
6255 r = radeon_fence_driver_init(rdev);
6256 if (r)
6257 return r;
6258
6259 /* initialize memory controller */
6260 r = cik_mc_init(rdev);
6261 if (r)
6262 return r;
6263 /* Memory manager */
6264 r = radeon_bo_init(rdev);
6265 if (r)
6266 return r;
6267
6268 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6269 ring->ring_obj = NULL;
6270 r600_ring_init(rdev, ring, 1024 * 1024);
6271
Alex Deucher963e81f2013-06-26 17:37:11 -04006272 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6273 ring->ring_obj = NULL;
6274 r600_ring_init(rdev, ring, 1024 * 1024);
6275 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6276 if (r)
6277 return r;
6278
6279 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6280 ring->ring_obj = NULL;
6281 r600_ring_init(rdev, ring, 1024 * 1024);
6282 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6283 if (r)
6284 return r;
6285
Alex Deucher7bf94a22012-08-17 11:48:29 -04006286 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6287 ring->ring_obj = NULL;
6288 r600_ring_init(rdev, ring, 256 * 1024);
6289
6290 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6291 ring->ring_obj = NULL;
6292 r600_ring_init(rdev, ring, 256 * 1024);
6293
Christian König87167bb2013-04-09 13:39:21 -04006294 r = radeon_uvd_init(rdev);
6295 if (!r) {
6296 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6297 ring->ring_obj = NULL;
6298 r600_ring_init(rdev, ring, 4096);
6299 }
6300
Alex Deucher7bf94a22012-08-17 11:48:29 -04006301 rdev->ih.ring_obj = NULL;
6302 r600_ih_ring_init(rdev, 64 * 1024);
6303
6304 r = r600_pcie_gart_init(rdev);
6305 if (r)
6306 return r;
6307
6308 rdev->accel_working = true;
6309 r = cik_startup(rdev);
6310 if (r) {
6311 dev_err(rdev->dev, "disabling GPU acceleration\n");
6312 cik_cp_fini(rdev);
6313 cik_sdma_fini(rdev);
6314 cik_irq_fini(rdev);
6315 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006316 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006317 radeon_wb_fini(rdev);
6318 radeon_ib_pool_fini(rdev);
6319 radeon_vm_manager_fini(rdev);
6320 radeon_irq_kms_fini(rdev);
6321 cik_pcie_gart_fini(rdev);
6322 rdev->accel_working = false;
6323 }
6324
6325 /* Don't start up if the MC ucode is missing.
6326 * The default clocks and voltages before the MC ucode
6327 * is loaded are not suffient for advanced operations.
6328 */
6329 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6330 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6331 return -EINVAL;
6332 }
6333
6334 return 0;
6335}
6336
6337/**
6338 * cik_fini - asic specific driver and hw fini
6339 *
6340 * @rdev: radeon_device pointer
6341 *
6342 * Tear down the asic specific driver variables and program the hw
6343 * to an idle state (CIK).
6344 * Called at driver unload.
6345 */
6346void cik_fini(struct radeon_device *rdev)
6347{
6348 cik_cp_fini(rdev);
6349 cik_sdma_fini(rdev);
6350 cik_irq_fini(rdev);
6351 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006352 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006353 radeon_wb_fini(rdev);
6354 radeon_vm_manager_fini(rdev);
6355 radeon_ib_pool_fini(rdev);
6356 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02006357 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006358 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006359 cik_pcie_gart_fini(rdev);
6360 r600_vram_scratch_fini(rdev);
6361 radeon_gem_fini(rdev);
6362 radeon_fence_driver_fini(rdev);
6363 radeon_bo_fini(rdev);
6364 radeon_atombios_fini(rdev);
6365 kfree(rdev->bios);
6366 rdev->bios = NULL;
6367}
Alex Deuchercd84a272012-07-20 17:13:13 -04006368
6369/* display watermark setup */
6370/**
6371 * dce8_line_buffer_adjust - Set up the line buffer
6372 *
6373 * @rdev: radeon_device pointer
6374 * @radeon_crtc: the selected display controller
6375 * @mode: the current display mode on the selected display
6376 * controller
6377 *
6378 * Setup up the line buffer allocation for
6379 * the selected display controller (CIK).
6380 * Returns the line buffer size in pixels.
6381 */
6382static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6383 struct radeon_crtc *radeon_crtc,
6384 struct drm_display_mode *mode)
6385{
6386 u32 tmp;
6387
6388 /*
6389 * Line Buffer Setup
6390 * There are 6 line buffers, one for each display controllers.
6391 * There are 3 partitions per LB. Select the number of partitions
6392 * to enable based on the display width. For display widths larger
6393 * than 4096, you need use to use 2 display controllers and combine
6394 * them using the stereo blender.
6395 */
6396 if (radeon_crtc->base.enabled && mode) {
6397 if (mode->crtc_hdisplay < 1920)
6398 tmp = 1;
6399 else if (mode->crtc_hdisplay < 2560)
6400 tmp = 2;
6401 else if (mode->crtc_hdisplay < 4096)
6402 tmp = 0;
6403 else {
6404 DRM_DEBUG_KMS("Mode too big for LB!\n");
6405 tmp = 0;
6406 }
6407 } else
6408 tmp = 1;
6409
6410 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6411 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6412
6413 if (radeon_crtc->base.enabled && mode) {
6414 switch (tmp) {
6415 case 0:
6416 default:
6417 return 4096 * 2;
6418 case 1:
6419 return 1920 * 2;
6420 case 2:
6421 return 2560 * 2;
6422 }
6423 }
6424
6425 /* controller not enabled, so no lb used */
6426 return 0;
6427}
6428
6429/**
6430 * cik_get_number_of_dram_channels - get the number of dram channels
6431 *
6432 * @rdev: radeon_device pointer
6433 *
6434 * Look up the number of video ram channels (CIK).
6435 * Used for display watermark bandwidth calculations
6436 * Returns the number of dram channels
6437 */
6438static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6439{
6440 u32 tmp = RREG32(MC_SHARED_CHMAP);
6441
6442 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6443 case 0:
6444 default:
6445 return 1;
6446 case 1:
6447 return 2;
6448 case 2:
6449 return 4;
6450 case 3:
6451 return 8;
6452 case 4:
6453 return 3;
6454 case 5:
6455 return 6;
6456 case 6:
6457 return 10;
6458 case 7:
6459 return 12;
6460 case 8:
6461 return 16;
6462 }
6463}
6464
6465struct dce8_wm_params {
6466 u32 dram_channels; /* number of dram channels */
6467 u32 yclk; /* bandwidth per dram data pin in kHz */
6468 u32 sclk; /* engine clock in kHz */
6469 u32 disp_clk; /* display clock in kHz */
6470 u32 src_width; /* viewport width */
6471 u32 active_time; /* active display time in ns */
6472 u32 blank_time; /* blank time in ns */
6473 bool interlaced; /* mode is interlaced */
6474 fixed20_12 vsc; /* vertical scale ratio */
6475 u32 num_heads; /* number of active crtcs */
6476 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6477 u32 lb_size; /* line buffer allocated to pipe */
6478 u32 vtaps; /* vertical scaler taps */
6479};
6480
6481/**
6482 * dce8_dram_bandwidth - get the dram bandwidth
6483 *
6484 * @wm: watermark calculation data
6485 *
6486 * Calculate the raw dram bandwidth (CIK).
6487 * Used for display watermark bandwidth calculations
6488 * Returns the dram bandwidth in MBytes/s
6489 */
6490static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6491{
6492 /* Calculate raw DRAM Bandwidth */
6493 fixed20_12 dram_efficiency; /* 0.7 */
6494 fixed20_12 yclk, dram_channels, bandwidth;
6495 fixed20_12 a;
6496
6497 a.full = dfixed_const(1000);
6498 yclk.full = dfixed_const(wm->yclk);
6499 yclk.full = dfixed_div(yclk, a);
6500 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6501 a.full = dfixed_const(10);
6502 dram_efficiency.full = dfixed_const(7);
6503 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6504 bandwidth.full = dfixed_mul(dram_channels, yclk);
6505 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6506
6507 return dfixed_trunc(bandwidth);
6508}
6509
6510/**
6511 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6512 *
6513 * @wm: watermark calculation data
6514 *
6515 * Calculate the dram bandwidth used for display (CIK).
6516 * Used for display watermark bandwidth calculations
6517 * Returns the dram bandwidth for display in MBytes/s
6518 */
6519static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6520{
6521 /* Calculate DRAM Bandwidth and the part allocated to display. */
6522 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6523 fixed20_12 yclk, dram_channels, bandwidth;
6524 fixed20_12 a;
6525
6526 a.full = dfixed_const(1000);
6527 yclk.full = dfixed_const(wm->yclk);
6528 yclk.full = dfixed_div(yclk, a);
6529 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6530 a.full = dfixed_const(10);
6531 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6532 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6533 bandwidth.full = dfixed_mul(dram_channels, yclk);
6534 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6535
6536 return dfixed_trunc(bandwidth);
6537}
6538
6539/**
6540 * dce8_data_return_bandwidth - get the data return bandwidth
6541 *
6542 * @wm: watermark calculation data
6543 *
6544 * Calculate the data return bandwidth used for display (CIK).
6545 * Used for display watermark bandwidth calculations
6546 * Returns the data return bandwidth in MBytes/s
6547 */
6548static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6549{
6550 /* Calculate the display Data return Bandwidth */
6551 fixed20_12 return_efficiency; /* 0.8 */
6552 fixed20_12 sclk, bandwidth;
6553 fixed20_12 a;
6554
6555 a.full = dfixed_const(1000);
6556 sclk.full = dfixed_const(wm->sclk);
6557 sclk.full = dfixed_div(sclk, a);
6558 a.full = dfixed_const(10);
6559 return_efficiency.full = dfixed_const(8);
6560 return_efficiency.full = dfixed_div(return_efficiency, a);
6561 a.full = dfixed_const(32);
6562 bandwidth.full = dfixed_mul(a, sclk);
6563 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6564
6565 return dfixed_trunc(bandwidth);
6566}
6567
6568/**
6569 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6570 *
6571 * @wm: watermark calculation data
6572 *
6573 * Calculate the dmif bandwidth used for display (CIK).
6574 * Used for display watermark bandwidth calculations
6575 * Returns the dmif bandwidth in MBytes/s
6576 */
6577static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6578{
6579 /* Calculate the DMIF Request Bandwidth */
6580 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6581 fixed20_12 disp_clk, bandwidth;
6582 fixed20_12 a, b;
6583
6584 a.full = dfixed_const(1000);
6585 disp_clk.full = dfixed_const(wm->disp_clk);
6586 disp_clk.full = dfixed_div(disp_clk, a);
6587 a.full = dfixed_const(32);
6588 b.full = dfixed_mul(a, disp_clk);
6589
6590 a.full = dfixed_const(10);
6591 disp_clk_request_efficiency.full = dfixed_const(8);
6592 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6593
6594 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6595
6596 return dfixed_trunc(bandwidth);
6597}
6598
6599/**
6600 * dce8_available_bandwidth - get the min available bandwidth
6601 *
6602 * @wm: watermark calculation data
6603 *
6604 * Calculate the min available bandwidth used for display (CIK).
6605 * Used for display watermark bandwidth calculations
6606 * Returns the min available bandwidth in MBytes/s
6607 */
6608static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6609{
6610 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6611 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6612 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6613 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6614
6615 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6616}
6617
6618/**
6619 * dce8_average_bandwidth - get the average available bandwidth
6620 *
6621 * @wm: watermark calculation data
6622 *
6623 * Calculate the average available bandwidth used for display (CIK).
6624 * Used for display watermark bandwidth calculations
6625 * Returns the average available bandwidth in MBytes/s
6626 */
6627static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6628{
6629 /* Calculate the display mode Average Bandwidth
6630 * DisplayMode should contain the source and destination dimensions,
6631 * timing, etc.
6632 */
6633 fixed20_12 bpp;
6634 fixed20_12 line_time;
6635 fixed20_12 src_width;
6636 fixed20_12 bandwidth;
6637 fixed20_12 a;
6638
6639 a.full = dfixed_const(1000);
6640 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6641 line_time.full = dfixed_div(line_time, a);
6642 bpp.full = dfixed_const(wm->bytes_per_pixel);
6643 src_width.full = dfixed_const(wm->src_width);
6644 bandwidth.full = dfixed_mul(src_width, bpp);
6645 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6646 bandwidth.full = dfixed_div(bandwidth, line_time);
6647
6648 return dfixed_trunc(bandwidth);
6649}
6650
6651/**
6652 * dce8_latency_watermark - get the latency watermark
6653 *
6654 * @wm: watermark calculation data
6655 *
6656 * Calculate the latency watermark (CIK).
6657 * Used for display watermark bandwidth calculations
6658 * Returns the latency watermark in ns
6659 */
6660static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6661{
6662 /* First calculate the latency in ns */
6663 u32 mc_latency = 2000; /* 2000 ns. */
6664 u32 available_bandwidth = dce8_available_bandwidth(wm);
6665 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6666 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6667 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6668 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6669 (wm->num_heads * cursor_line_pair_return_time);
6670 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6671 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6672 u32 tmp, dmif_size = 12288;
6673 fixed20_12 a, b, c;
6674
6675 if (wm->num_heads == 0)
6676 return 0;
6677
6678 a.full = dfixed_const(2);
6679 b.full = dfixed_const(1);
6680 if ((wm->vsc.full > a.full) ||
6681 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6682 (wm->vtaps >= 5) ||
6683 ((wm->vsc.full >= a.full) && wm->interlaced))
6684 max_src_lines_per_dst_line = 4;
6685 else
6686 max_src_lines_per_dst_line = 2;
6687
6688 a.full = dfixed_const(available_bandwidth);
6689 b.full = dfixed_const(wm->num_heads);
6690 a.full = dfixed_div(a, b);
6691
6692 b.full = dfixed_const(mc_latency + 512);
6693 c.full = dfixed_const(wm->disp_clk);
6694 b.full = dfixed_div(b, c);
6695
6696 c.full = dfixed_const(dmif_size);
6697 b.full = dfixed_div(c, b);
6698
6699 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6700
6701 b.full = dfixed_const(1000);
6702 c.full = dfixed_const(wm->disp_clk);
6703 b.full = dfixed_div(c, b);
6704 c.full = dfixed_const(wm->bytes_per_pixel);
6705 b.full = dfixed_mul(b, c);
6706
6707 lb_fill_bw = min(tmp, dfixed_trunc(b));
6708
6709 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6710 b.full = dfixed_const(1000);
6711 c.full = dfixed_const(lb_fill_bw);
6712 b.full = dfixed_div(c, b);
6713 a.full = dfixed_div(a, b);
6714 line_fill_time = dfixed_trunc(a);
6715
6716 if (line_fill_time < wm->active_time)
6717 return latency;
6718 else
6719 return latency + (line_fill_time - wm->active_time);
6720
6721}
6722
6723/**
6724 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6725 * average and available dram bandwidth
6726 *
6727 * @wm: watermark calculation data
6728 *
6729 * Check if the display average bandwidth fits in the display
6730 * dram bandwidth (CIK).
6731 * Used for display watermark bandwidth calculations
6732 * Returns true if the display fits, false if not.
6733 */
6734static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6735{
6736 if (dce8_average_bandwidth(wm) <=
6737 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6738 return true;
6739 else
6740 return false;
6741}
6742
6743/**
6744 * dce8_average_bandwidth_vs_available_bandwidth - check
6745 * average and available bandwidth
6746 *
6747 * @wm: watermark calculation data
6748 *
6749 * Check if the display average bandwidth fits in the display
6750 * available bandwidth (CIK).
6751 * Used for display watermark bandwidth calculations
6752 * Returns true if the display fits, false if not.
6753 */
6754static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6755{
6756 if (dce8_average_bandwidth(wm) <=
6757 (dce8_available_bandwidth(wm) / wm->num_heads))
6758 return true;
6759 else
6760 return false;
6761}
6762
6763/**
6764 * dce8_check_latency_hiding - check latency hiding
6765 *
6766 * @wm: watermark calculation data
6767 *
6768 * Check latency hiding (CIK).
6769 * Used for display watermark bandwidth calculations
6770 * Returns true if the display fits, false if not.
6771 */
6772static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6773{
6774 u32 lb_partitions = wm->lb_size / wm->src_width;
6775 u32 line_time = wm->active_time + wm->blank_time;
6776 u32 latency_tolerant_lines;
6777 u32 latency_hiding;
6778 fixed20_12 a;
6779
6780 a.full = dfixed_const(1);
6781 if (wm->vsc.full > a.full)
6782 latency_tolerant_lines = 1;
6783 else {
6784 if (lb_partitions <= (wm->vtaps + 1))
6785 latency_tolerant_lines = 1;
6786 else
6787 latency_tolerant_lines = 2;
6788 }
6789
6790 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6791
6792 if (dce8_latency_watermark(wm) <= latency_hiding)
6793 return true;
6794 else
6795 return false;
6796}
6797
6798/**
6799 * dce8_program_watermarks - program display watermarks
6800 *
6801 * @rdev: radeon_device pointer
6802 * @radeon_crtc: the selected display controller
6803 * @lb_size: line buffer size
6804 * @num_heads: number of display controllers in use
6805 *
6806 * Calculate and program the display watermarks for the
6807 * selected display controller (CIK).
6808 */
6809static void dce8_program_watermarks(struct radeon_device *rdev,
6810 struct radeon_crtc *radeon_crtc,
6811 u32 lb_size, u32 num_heads)
6812{
6813 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05006814 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04006815 u32 pixel_period;
6816 u32 line_time = 0;
6817 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6818 u32 tmp, wm_mask;
6819
6820 if (radeon_crtc->base.enabled && num_heads && mode) {
6821 pixel_period = 1000000 / (u32)mode->clock;
6822 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6823
Alex Deucher58ea2de2013-01-24 10:03:39 -05006824 /* watermark for high clocks */
6825 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6826 rdev->pm.dpm_enabled) {
6827 wm_high.yclk =
6828 radeon_dpm_get_mclk(rdev, false) * 10;
6829 wm_high.sclk =
6830 radeon_dpm_get_sclk(rdev, false) * 10;
6831 } else {
6832 wm_high.yclk = rdev->pm.current_mclk * 10;
6833 wm_high.sclk = rdev->pm.current_sclk * 10;
6834 }
6835
6836 wm_high.disp_clk = mode->clock;
6837 wm_high.src_width = mode->crtc_hdisplay;
6838 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
6839 wm_high.blank_time = line_time - wm_high.active_time;
6840 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04006841 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006842 wm_high.interlaced = true;
6843 wm_high.vsc = radeon_crtc->vsc;
6844 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04006845 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006846 wm_high.vtaps = 2;
6847 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
6848 wm_high.lb_size = lb_size;
6849 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
6850 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04006851
6852 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006853 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04006854
6855 /* possibly force display priority to high */
6856 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006857 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
6858 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
6859 !dce8_check_latency_hiding(&wm_high) ||
6860 (rdev->disp_priority == 2)) {
6861 DRM_DEBUG_KMS("force priority to high\n");
6862 }
6863
6864 /* watermark for low clocks */
6865 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6866 rdev->pm.dpm_enabled) {
6867 wm_low.yclk =
6868 radeon_dpm_get_mclk(rdev, true) * 10;
6869 wm_low.sclk =
6870 radeon_dpm_get_sclk(rdev, true) * 10;
6871 } else {
6872 wm_low.yclk = rdev->pm.current_mclk * 10;
6873 wm_low.sclk = rdev->pm.current_sclk * 10;
6874 }
6875
6876 wm_low.disp_clk = mode->clock;
6877 wm_low.src_width = mode->crtc_hdisplay;
6878 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
6879 wm_low.blank_time = line_time - wm_low.active_time;
6880 wm_low.interlaced = false;
6881 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6882 wm_low.interlaced = true;
6883 wm_low.vsc = radeon_crtc->vsc;
6884 wm_low.vtaps = 1;
6885 if (radeon_crtc->rmx_type != RMX_OFF)
6886 wm_low.vtaps = 2;
6887 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
6888 wm_low.lb_size = lb_size;
6889 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
6890 wm_low.num_heads = num_heads;
6891
6892 /* set for low clocks */
6893 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
6894
6895 /* possibly force display priority to high */
6896 /* should really do this at mode validation time... */
6897 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
6898 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
6899 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04006900 (rdev->disp_priority == 2)) {
6901 DRM_DEBUG_KMS("force priority to high\n");
6902 }
6903 }
6904
6905 /* select wm A */
6906 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6907 tmp = wm_mask;
6908 tmp &= ~LATENCY_WATERMARK_MASK(3);
6909 tmp |= LATENCY_WATERMARK_MASK(1);
6910 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6911 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6912 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6913 LATENCY_HIGH_WATERMARK(line_time)));
6914 /* select wm B */
6915 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6916 tmp &= ~LATENCY_WATERMARK_MASK(3);
6917 tmp |= LATENCY_WATERMARK_MASK(2);
6918 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6919 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6920 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6921 LATENCY_HIGH_WATERMARK(line_time)));
6922 /* restore original selection */
6923 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05006924
6925 /* save values for DPM */
6926 radeon_crtc->line_time = line_time;
6927 radeon_crtc->wm_high = latency_watermark_a;
6928 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04006929}
6930
6931/**
6932 * dce8_bandwidth_update - program display watermarks
6933 *
6934 * @rdev: radeon_device pointer
6935 *
6936 * Calculate and program the display watermarks and line
6937 * buffer allocation (CIK).
6938 */
6939void dce8_bandwidth_update(struct radeon_device *rdev)
6940{
6941 struct drm_display_mode *mode = NULL;
6942 u32 num_heads = 0, lb_size;
6943 int i;
6944
6945 radeon_update_display_priority(rdev);
6946
6947 for (i = 0; i < rdev->num_crtc; i++) {
6948 if (rdev->mode_info.crtcs[i]->base.enabled)
6949 num_heads++;
6950 }
6951 for (i = 0; i < rdev->num_crtc; i++) {
6952 mode = &rdev->mode_info.crtcs[i]->base.mode;
6953 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6954 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6955 }
6956}
Alex Deucher44fa3462012-12-18 22:17:00 -05006957
6958/**
6959 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6960 *
6961 * @rdev: radeon_device pointer
6962 *
6963 * Fetches a GPU clock counter snapshot (SI).
6964 * Returns the 64 bit clock counter snapshot.
6965 */
6966uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6967{
6968 uint64_t clock;
6969
6970 mutex_lock(&rdev->gpu_clock_mutex);
6971 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6972 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6973 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6974 mutex_unlock(&rdev->gpu_clock_mutex);
6975 return clock;
6976}
6977
Christian König87167bb2013-04-09 13:39:21 -04006978static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6979 u32 cntl_reg, u32 status_reg)
6980{
6981 int r, i;
6982 struct atom_clock_dividers dividers;
6983 uint32_t tmp;
6984
6985 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6986 clock, false, &dividers);
6987 if (r)
6988 return r;
6989
6990 tmp = RREG32_SMC(cntl_reg);
6991 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6992 tmp |= dividers.post_divider;
6993 WREG32_SMC(cntl_reg, tmp);
6994
6995 for (i = 0; i < 100; i++) {
6996 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6997 break;
6998 mdelay(10);
6999 }
7000 if (i == 100)
7001 return -ETIMEDOUT;
7002
7003 return 0;
7004}
7005
7006int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7007{
7008 int r = 0;
7009
7010 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7011 if (r)
7012 return r;
7013
7014 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7015 return r;
7016}
7017
7018int cik_uvd_resume(struct radeon_device *rdev)
7019{
7020 uint64_t addr;
7021 uint32_t size;
7022 int r;
7023
7024 r = radeon_uvd_resume(rdev);
7025 if (r)
7026 return r;
7027
7028 /* programm the VCPU memory controller bits 0-27 */
7029 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02007030 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04007031 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
7032 WREG32(UVD_VCPU_CACHE_SIZE0, size);
7033
7034 addr += size;
7035 size = RADEON_UVD_STACK_SIZE >> 3;
7036 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7037 WREG32(UVD_VCPU_CACHE_SIZE1, size);
7038
7039 addr += size;
7040 size = RADEON_UVD_HEAP_SIZE >> 3;
7041 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7042 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7043
7044 /* bits 28-31 */
7045 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7046 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7047
7048 /* bits 32-39 */
7049 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7050 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7051
7052 return 0;
7053}