blob: e92a9721ca257f4337df250c9f36fc4554cf8346 [file] [log] [blame]
Alex Deucher8cc1a532013-04-09 12:41:24 -04001/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
Alex Deucher8cc1a532013-04-09 12:41:24 -040025#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
Alex Deucher6f2043c2013-04-09 12:43:41 -040029#include "radeon_asic.h"
Alex Deucher8cc1a532013-04-09 12:41:24 -040030#include "cikd.h"
31#include "atom.h"
Alex Deucher841cf442012-12-18 21:47:44 -050032#include "cik_blit_shaders.h"
Alex Deucher8c68e392013-06-21 15:38:37 -040033#include "radeon_ucode.h"
Alex Deucher02c81322012-12-18 21:43:07 -050034
35MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040041MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050042MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
43MODULE_FIRMWARE("radeon/KAVERI_me.bin");
44MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
45MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
46MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040047MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050048MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
49MODULE_FIRMWARE("radeon/KABINI_me.bin");
50MODULE_FIRMWARE("radeon/KABINI_ce.bin");
51MODULE_FIRMWARE("radeon/KABINI_mec.bin");
52MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
Alex Deucher21a93e12013-04-09 12:47:11 -040053MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
Alex Deucher02c81322012-12-18 21:43:07 -050054
Alex Deuchera59781b2012-11-09 10:45:57 -050055extern int r600_ih_ring_alloc(struct radeon_device *rdev);
56extern void r600_ih_ring_fini(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040057extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
58extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
Alex Deuchercc066712013-04-09 12:59:51 -040059extern bool evergreen_is_display_hung(struct radeon_device *rdev);
Alex Deucher1c491652013-04-09 12:45:26 -040060extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
Alex Deucher7bf94a22012-08-17 11:48:29 -040061extern void si_rlc_fini(struct radeon_device *rdev);
62extern int si_rlc_init(struct radeon_device *rdev);
Alex Deucher866d83d2013-04-15 17:13:29 -040063extern void si_rlc_reset(struct radeon_device *rdev);
Alex Deuchercc066712013-04-09 12:59:51 -040064static void cik_rlc_stop(struct radeon_device *rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -040065static void cik_pcie_gen3_enable(struct radeon_device *rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -040066static void cik_program_aspm(struct radeon_device *rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -040067
Alex Deucher6e2c3c02013-04-03 19:28:32 -040068/*
69 * Indirect registers accessor
70 */
71u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
72{
73 u32 r;
74
75 WREG32(PCIE_INDEX, reg);
76 (void)RREG32(PCIE_INDEX);
77 r = RREG32(PCIE_DATA);
78 return r;
79}
80
81void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
82{
83 WREG32(PCIE_INDEX, reg);
84 (void)RREG32(PCIE_INDEX);
85 WREG32(PCIE_DATA, v);
86 (void)RREG32(PCIE_DATA);
87}
88
Alex Deucher0aafd312013-04-09 14:43:30 -040089static const u32 bonaire_golden_spm_registers[] =
90{
91 0x30800, 0xe0ffffff, 0xe0000000
92};
93
94static const u32 bonaire_golden_common_registers[] =
95{
96 0xc770, 0xffffffff, 0x00000800,
97 0xc774, 0xffffffff, 0x00000800,
98 0xc798, 0xffffffff, 0x00007fbf,
99 0xc79c, 0xffffffff, 0x00007faf
100};
101
102static const u32 bonaire_golden_registers[] =
103{
104 0x3354, 0x00000333, 0x00000333,
105 0x3350, 0x000c0fc0, 0x00040200,
106 0x9a10, 0x00010000, 0x00058208,
107 0x3c000, 0xffff1fff, 0x00140000,
108 0x3c200, 0xfdfc0fff, 0x00000100,
109 0x3c234, 0x40000000, 0x40000200,
110 0x9830, 0xffffffff, 0x00000000,
111 0x9834, 0xf00fffff, 0x00000400,
112 0x9838, 0x0002021c, 0x00020200,
113 0xc78, 0x00000080, 0x00000000,
114 0x5bb0, 0x000000f0, 0x00000070,
115 0x5bc0, 0xf0311fff, 0x80300000,
116 0x98f8, 0x73773777, 0x12010001,
117 0x350c, 0x00810000, 0x408af000,
118 0x7030, 0x31000111, 0x00000011,
119 0x2f48, 0x73773777, 0x12010001,
120 0x220c, 0x00007fb6, 0x0021a1b1,
121 0x2210, 0x00007fb6, 0x002021b1,
122 0x2180, 0x00007fb6, 0x00002191,
123 0x2218, 0x00007fb6, 0x002121b1,
124 0x221c, 0x00007fb6, 0x002021b1,
125 0x21dc, 0x00007fb6, 0x00002191,
126 0x21e0, 0x00007fb6, 0x00002191,
127 0x3628, 0x0000003f, 0x0000000a,
128 0x362c, 0x0000003f, 0x0000000a,
129 0x2ae4, 0x00073ffe, 0x000022a2,
130 0x240c, 0x000007ff, 0x00000000,
131 0x8a14, 0xf000003f, 0x00000007,
132 0x8bf0, 0x00002001, 0x00000001,
133 0x8b24, 0xffffffff, 0x00ffffff,
134 0x30a04, 0x0000ff0f, 0x00000000,
135 0x28a4c, 0x07ffffff, 0x06000000,
136 0x4d8, 0x00000fff, 0x00000100,
137 0x3e78, 0x00000001, 0x00000002,
138 0x9100, 0x03000000, 0x0362c688,
139 0x8c00, 0x000000ff, 0x00000001,
140 0xe40, 0x00001fff, 0x00001fff,
141 0x9060, 0x0000007f, 0x00000020,
142 0x9508, 0x00010000, 0x00010000,
143 0xac14, 0x000003ff, 0x000000f3,
144 0xac0c, 0xffffffff, 0x00001032
145};
146
147static const u32 bonaire_mgcg_cgcg_init[] =
148{
149 0xc420, 0xffffffff, 0xfffffffc,
150 0x30800, 0xffffffff, 0xe0000000,
151 0x3c2a0, 0xffffffff, 0x00000100,
152 0x3c208, 0xffffffff, 0x00000100,
153 0x3c2c0, 0xffffffff, 0xc0000100,
154 0x3c2c8, 0xffffffff, 0xc0000100,
155 0x3c2c4, 0xffffffff, 0xc0000100,
156 0x55e4, 0xffffffff, 0x00600100,
157 0x3c280, 0xffffffff, 0x00000100,
158 0x3c214, 0xffffffff, 0x06000100,
159 0x3c220, 0xffffffff, 0x00000100,
160 0x3c218, 0xffffffff, 0x06000100,
161 0x3c204, 0xffffffff, 0x00000100,
162 0x3c2e0, 0xffffffff, 0x00000100,
163 0x3c224, 0xffffffff, 0x00000100,
164 0x3c200, 0xffffffff, 0x00000100,
165 0x3c230, 0xffffffff, 0x00000100,
166 0x3c234, 0xffffffff, 0x00000100,
167 0x3c250, 0xffffffff, 0x00000100,
168 0x3c254, 0xffffffff, 0x00000100,
169 0x3c258, 0xffffffff, 0x00000100,
170 0x3c25c, 0xffffffff, 0x00000100,
171 0x3c260, 0xffffffff, 0x00000100,
172 0x3c27c, 0xffffffff, 0x00000100,
173 0x3c278, 0xffffffff, 0x00000100,
174 0x3c210, 0xffffffff, 0x06000100,
175 0x3c290, 0xffffffff, 0x00000100,
176 0x3c274, 0xffffffff, 0x00000100,
177 0x3c2b4, 0xffffffff, 0x00000100,
178 0x3c2b0, 0xffffffff, 0x00000100,
179 0x3c270, 0xffffffff, 0x00000100,
180 0x30800, 0xffffffff, 0xe0000000,
181 0x3c020, 0xffffffff, 0x00010000,
182 0x3c024, 0xffffffff, 0x00030002,
183 0x3c028, 0xffffffff, 0x00040007,
184 0x3c02c, 0xffffffff, 0x00060005,
185 0x3c030, 0xffffffff, 0x00090008,
186 0x3c034, 0xffffffff, 0x00010000,
187 0x3c038, 0xffffffff, 0x00030002,
188 0x3c03c, 0xffffffff, 0x00040007,
189 0x3c040, 0xffffffff, 0x00060005,
190 0x3c044, 0xffffffff, 0x00090008,
191 0x3c048, 0xffffffff, 0x00010000,
192 0x3c04c, 0xffffffff, 0x00030002,
193 0x3c050, 0xffffffff, 0x00040007,
194 0x3c054, 0xffffffff, 0x00060005,
195 0x3c058, 0xffffffff, 0x00090008,
196 0x3c05c, 0xffffffff, 0x00010000,
197 0x3c060, 0xffffffff, 0x00030002,
198 0x3c064, 0xffffffff, 0x00040007,
199 0x3c068, 0xffffffff, 0x00060005,
200 0x3c06c, 0xffffffff, 0x00090008,
201 0x3c070, 0xffffffff, 0x00010000,
202 0x3c074, 0xffffffff, 0x00030002,
203 0x3c078, 0xffffffff, 0x00040007,
204 0x3c07c, 0xffffffff, 0x00060005,
205 0x3c080, 0xffffffff, 0x00090008,
206 0x3c084, 0xffffffff, 0x00010000,
207 0x3c088, 0xffffffff, 0x00030002,
208 0x3c08c, 0xffffffff, 0x00040007,
209 0x3c090, 0xffffffff, 0x00060005,
210 0x3c094, 0xffffffff, 0x00090008,
211 0x3c098, 0xffffffff, 0x00010000,
212 0x3c09c, 0xffffffff, 0x00030002,
213 0x3c0a0, 0xffffffff, 0x00040007,
214 0x3c0a4, 0xffffffff, 0x00060005,
215 0x3c0a8, 0xffffffff, 0x00090008,
216 0x3c000, 0xffffffff, 0x96e00200,
217 0x8708, 0xffffffff, 0x00900100,
218 0xc424, 0xffffffff, 0x0020003f,
219 0x38, 0xffffffff, 0x0140001c,
220 0x3c, 0x000f0000, 0x000f0000,
221 0x220, 0xffffffff, 0xC060000C,
222 0x224, 0xc0000fff, 0x00000100,
223 0xf90, 0xffffffff, 0x00000100,
224 0xf98, 0x00000101, 0x00000000,
225 0x20a8, 0xffffffff, 0x00000104,
226 0x55e4, 0xff000fff, 0x00000100,
227 0x30cc, 0xc0000fff, 0x00000104,
228 0xc1e4, 0x00000001, 0x00000001,
229 0xd00c, 0xff000ff0, 0x00000100,
230 0xd80c, 0xff000ff0, 0x00000100
231};
232
233static const u32 spectre_golden_spm_registers[] =
234{
235 0x30800, 0xe0ffffff, 0xe0000000
236};
237
238static const u32 spectre_golden_common_registers[] =
239{
240 0xc770, 0xffffffff, 0x00000800,
241 0xc774, 0xffffffff, 0x00000800,
242 0xc798, 0xffffffff, 0x00007fbf,
243 0xc79c, 0xffffffff, 0x00007faf
244};
245
246static const u32 spectre_golden_registers[] =
247{
248 0x3c000, 0xffff1fff, 0x96940200,
249 0x3c00c, 0xffff0001, 0xff000000,
250 0x3c200, 0xfffc0fff, 0x00000100,
251 0x6ed8, 0x00010101, 0x00010000,
252 0x9834, 0xf00fffff, 0x00000400,
253 0x9838, 0xfffffffc, 0x00020200,
254 0x5bb0, 0x000000f0, 0x00000070,
255 0x5bc0, 0xf0311fff, 0x80300000,
256 0x98f8, 0x73773777, 0x12010001,
257 0x9b7c, 0x00ff0000, 0x00fc0000,
258 0x2f48, 0x73773777, 0x12010001,
259 0x8a14, 0xf000003f, 0x00000007,
260 0x8b24, 0xffffffff, 0x00ffffff,
261 0x28350, 0x3f3f3fff, 0x00000082,
262 0x28355, 0x0000003f, 0x00000000,
263 0x3e78, 0x00000001, 0x00000002,
264 0x913c, 0xffff03df, 0x00000004,
265 0xc768, 0x00000008, 0x00000008,
266 0x8c00, 0x000008ff, 0x00000800,
267 0x9508, 0x00010000, 0x00010000,
268 0xac0c, 0xffffffff, 0x54763210,
269 0x214f8, 0x01ff01ff, 0x00000002,
270 0x21498, 0x007ff800, 0x00200000,
271 0x2015c, 0xffffffff, 0x00000f40,
272 0x30934, 0xffffffff, 0x00000001
273};
274
275static const u32 spectre_mgcg_cgcg_init[] =
276{
277 0xc420, 0xffffffff, 0xfffffffc,
278 0x30800, 0xffffffff, 0xe0000000,
279 0x3c2a0, 0xffffffff, 0x00000100,
280 0x3c208, 0xffffffff, 0x00000100,
281 0x3c2c0, 0xffffffff, 0x00000100,
282 0x3c2c8, 0xffffffff, 0x00000100,
283 0x3c2c4, 0xffffffff, 0x00000100,
284 0x55e4, 0xffffffff, 0x00600100,
285 0x3c280, 0xffffffff, 0x00000100,
286 0x3c214, 0xffffffff, 0x06000100,
287 0x3c220, 0xffffffff, 0x00000100,
288 0x3c218, 0xffffffff, 0x06000100,
289 0x3c204, 0xffffffff, 0x00000100,
290 0x3c2e0, 0xffffffff, 0x00000100,
291 0x3c224, 0xffffffff, 0x00000100,
292 0x3c200, 0xffffffff, 0x00000100,
293 0x3c230, 0xffffffff, 0x00000100,
294 0x3c234, 0xffffffff, 0x00000100,
295 0x3c250, 0xffffffff, 0x00000100,
296 0x3c254, 0xffffffff, 0x00000100,
297 0x3c258, 0xffffffff, 0x00000100,
298 0x3c25c, 0xffffffff, 0x00000100,
299 0x3c260, 0xffffffff, 0x00000100,
300 0x3c27c, 0xffffffff, 0x00000100,
301 0x3c278, 0xffffffff, 0x00000100,
302 0x3c210, 0xffffffff, 0x06000100,
303 0x3c290, 0xffffffff, 0x00000100,
304 0x3c274, 0xffffffff, 0x00000100,
305 0x3c2b4, 0xffffffff, 0x00000100,
306 0x3c2b0, 0xffffffff, 0x00000100,
307 0x3c270, 0xffffffff, 0x00000100,
308 0x30800, 0xffffffff, 0xe0000000,
309 0x3c020, 0xffffffff, 0x00010000,
310 0x3c024, 0xffffffff, 0x00030002,
311 0x3c028, 0xffffffff, 0x00040007,
312 0x3c02c, 0xffffffff, 0x00060005,
313 0x3c030, 0xffffffff, 0x00090008,
314 0x3c034, 0xffffffff, 0x00010000,
315 0x3c038, 0xffffffff, 0x00030002,
316 0x3c03c, 0xffffffff, 0x00040007,
317 0x3c040, 0xffffffff, 0x00060005,
318 0x3c044, 0xffffffff, 0x00090008,
319 0x3c048, 0xffffffff, 0x00010000,
320 0x3c04c, 0xffffffff, 0x00030002,
321 0x3c050, 0xffffffff, 0x00040007,
322 0x3c054, 0xffffffff, 0x00060005,
323 0x3c058, 0xffffffff, 0x00090008,
324 0x3c05c, 0xffffffff, 0x00010000,
325 0x3c060, 0xffffffff, 0x00030002,
326 0x3c064, 0xffffffff, 0x00040007,
327 0x3c068, 0xffffffff, 0x00060005,
328 0x3c06c, 0xffffffff, 0x00090008,
329 0x3c070, 0xffffffff, 0x00010000,
330 0x3c074, 0xffffffff, 0x00030002,
331 0x3c078, 0xffffffff, 0x00040007,
332 0x3c07c, 0xffffffff, 0x00060005,
333 0x3c080, 0xffffffff, 0x00090008,
334 0x3c084, 0xffffffff, 0x00010000,
335 0x3c088, 0xffffffff, 0x00030002,
336 0x3c08c, 0xffffffff, 0x00040007,
337 0x3c090, 0xffffffff, 0x00060005,
338 0x3c094, 0xffffffff, 0x00090008,
339 0x3c098, 0xffffffff, 0x00010000,
340 0x3c09c, 0xffffffff, 0x00030002,
341 0x3c0a0, 0xffffffff, 0x00040007,
342 0x3c0a4, 0xffffffff, 0x00060005,
343 0x3c0a8, 0xffffffff, 0x00090008,
344 0x3c0ac, 0xffffffff, 0x00010000,
345 0x3c0b0, 0xffffffff, 0x00030002,
346 0x3c0b4, 0xffffffff, 0x00040007,
347 0x3c0b8, 0xffffffff, 0x00060005,
348 0x3c0bc, 0xffffffff, 0x00090008,
349 0x3c000, 0xffffffff, 0x96e00200,
350 0x8708, 0xffffffff, 0x00900100,
351 0xc424, 0xffffffff, 0x0020003f,
352 0x38, 0xffffffff, 0x0140001c,
353 0x3c, 0x000f0000, 0x000f0000,
354 0x220, 0xffffffff, 0xC060000C,
355 0x224, 0xc0000fff, 0x00000100,
356 0xf90, 0xffffffff, 0x00000100,
357 0xf98, 0x00000101, 0x00000000,
358 0x20a8, 0xffffffff, 0x00000104,
359 0x55e4, 0xff000fff, 0x00000100,
360 0x30cc, 0xc0000fff, 0x00000104,
361 0xc1e4, 0x00000001, 0x00000001,
362 0xd00c, 0xff000ff0, 0x00000100,
363 0xd80c, 0xff000ff0, 0x00000100
364};
365
366static const u32 kalindi_golden_spm_registers[] =
367{
368 0x30800, 0xe0ffffff, 0xe0000000
369};
370
371static const u32 kalindi_golden_common_registers[] =
372{
373 0xc770, 0xffffffff, 0x00000800,
374 0xc774, 0xffffffff, 0x00000800,
375 0xc798, 0xffffffff, 0x00007fbf,
376 0xc79c, 0xffffffff, 0x00007faf
377};
378
379static const u32 kalindi_golden_registers[] =
380{
381 0x3c000, 0xffffdfff, 0x6e944040,
382 0x55e4, 0xff607fff, 0xfc000100,
383 0x3c220, 0xff000fff, 0x00000100,
384 0x3c224, 0xff000fff, 0x00000100,
385 0x3c200, 0xfffc0fff, 0x00000100,
386 0x6ed8, 0x00010101, 0x00010000,
387 0x9830, 0xffffffff, 0x00000000,
388 0x9834, 0xf00fffff, 0x00000400,
389 0x5bb0, 0x000000f0, 0x00000070,
390 0x5bc0, 0xf0311fff, 0x80300000,
391 0x98f8, 0x73773777, 0x12010001,
392 0x98fc, 0xffffffff, 0x00000010,
393 0x9b7c, 0x00ff0000, 0x00fc0000,
394 0x8030, 0x00001f0f, 0x0000100a,
395 0x2f48, 0x73773777, 0x12010001,
396 0x2408, 0x000fffff, 0x000c007f,
397 0x8a14, 0xf000003f, 0x00000007,
398 0x8b24, 0x3fff3fff, 0x00ffcfff,
399 0x30a04, 0x0000ff0f, 0x00000000,
400 0x28a4c, 0x07ffffff, 0x06000000,
401 0x4d8, 0x00000fff, 0x00000100,
402 0x3e78, 0x00000001, 0x00000002,
403 0xc768, 0x00000008, 0x00000008,
404 0x8c00, 0x000000ff, 0x00000003,
405 0x214f8, 0x01ff01ff, 0x00000002,
406 0x21498, 0x007ff800, 0x00200000,
407 0x2015c, 0xffffffff, 0x00000f40,
408 0x88c4, 0x001f3ae3, 0x00000082,
409 0x88d4, 0x0000001f, 0x00000010,
410 0x30934, 0xffffffff, 0x00000000
411};
412
413static const u32 kalindi_mgcg_cgcg_init[] =
414{
415 0xc420, 0xffffffff, 0xfffffffc,
416 0x30800, 0xffffffff, 0xe0000000,
417 0x3c2a0, 0xffffffff, 0x00000100,
418 0x3c208, 0xffffffff, 0x00000100,
419 0x3c2c0, 0xffffffff, 0x00000100,
420 0x3c2c8, 0xffffffff, 0x00000100,
421 0x3c2c4, 0xffffffff, 0x00000100,
422 0x55e4, 0xffffffff, 0x00600100,
423 0x3c280, 0xffffffff, 0x00000100,
424 0x3c214, 0xffffffff, 0x06000100,
425 0x3c220, 0xffffffff, 0x00000100,
426 0x3c218, 0xffffffff, 0x06000100,
427 0x3c204, 0xffffffff, 0x00000100,
428 0x3c2e0, 0xffffffff, 0x00000100,
429 0x3c224, 0xffffffff, 0x00000100,
430 0x3c200, 0xffffffff, 0x00000100,
431 0x3c230, 0xffffffff, 0x00000100,
432 0x3c234, 0xffffffff, 0x00000100,
433 0x3c250, 0xffffffff, 0x00000100,
434 0x3c254, 0xffffffff, 0x00000100,
435 0x3c258, 0xffffffff, 0x00000100,
436 0x3c25c, 0xffffffff, 0x00000100,
437 0x3c260, 0xffffffff, 0x00000100,
438 0x3c27c, 0xffffffff, 0x00000100,
439 0x3c278, 0xffffffff, 0x00000100,
440 0x3c210, 0xffffffff, 0x06000100,
441 0x3c290, 0xffffffff, 0x00000100,
442 0x3c274, 0xffffffff, 0x00000100,
443 0x3c2b4, 0xffffffff, 0x00000100,
444 0x3c2b0, 0xffffffff, 0x00000100,
445 0x3c270, 0xffffffff, 0x00000100,
446 0x30800, 0xffffffff, 0xe0000000,
447 0x3c020, 0xffffffff, 0x00010000,
448 0x3c024, 0xffffffff, 0x00030002,
449 0x3c028, 0xffffffff, 0x00040007,
450 0x3c02c, 0xffffffff, 0x00060005,
451 0x3c030, 0xffffffff, 0x00090008,
452 0x3c034, 0xffffffff, 0x00010000,
453 0x3c038, 0xffffffff, 0x00030002,
454 0x3c03c, 0xffffffff, 0x00040007,
455 0x3c040, 0xffffffff, 0x00060005,
456 0x3c044, 0xffffffff, 0x00090008,
457 0x3c000, 0xffffffff, 0x96e00200,
458 0x8708, 0xffffffff, 0x00900100,
459 0xc424, 0xffffffff, 0x0020003f,
460 0x38, 0xffffffff, 0x0140001c,
461 0x3c, 0x000f0000, 0x000f0000,
462 0x220, 0xffffffff, 0xC060000C,
463 0x224, 0xc0000fff, 0x00000100,
464 0x20a8, 0xffffffff, 0x00000104,
465 0x55e4, 0xff000fff, 0x00000100,
466 0x30cc, 0xc0000fff, 0x00000104,
467 0xc1e4, 0x00000001, 0x00000001,
468 0xd00c, 0xff000ff0, 0x00000100,
469 0xd80c, 0xff000ff0, 0x00000100
470};
471
472static void cik_init_golden_registers(struct radeon_device *rdev)
473{
474 switch (rdev->family) {
475 case CHIP_BONAIRE:
476 radeon_program_register_sequence(rdev,
477 bonaire_mgcg_cgcg_init,
478 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
479 radeon_program_register_sequence(rdev,
480 bonaire_golden_registers,
481 (const u32)ARRAY_SIZE(bonaire_golden_registers));
482 radeon_program_register_sequence(rdev,
483 bonaire_golden_common_registers,
484 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
485 radeon_program_register_sequence(rdev,
486 bonaire_golden_spm_registers,
487 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
488 break;
489 case CHIP_KABINI:
490 radeon_program_register_sequence(rdev,
491 kalindi_mgcg_cgcg_init,
492 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
493 radeon_program_register_sequence(rdev,
494 kalindi_golden_registers,
495 (const u32)ARRAY_SIZE(kalindi_golden_registers));
496 radeon_program_register_sequence(rdev,
497 kalindi_golden_common_registers,
498 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
499 radeon_program_register_sequence(rdev,
500 kalindi_golden_spm_registers,
501 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
502 break;
503 case CHIP_KAVERI:
504 radeon_program_register_sequence(rdev,
505 spectre_mgcg_cgcg_init,
506 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
507 radeon_program_register_sequence(rdev,
508 spectre_golden_registers,
509 (const u32)ARRAY_SIZE(spectre_golden_registers));
510 radeon_program_register_sequence(rdev,
511 spectre_golden_common_registers,
512 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
513 radeon_program_register_sequence(rdev,
514 spectre_golden_spm_registers,
515 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
516 break;
517 default:
518 break;
519 }
520}
521
Alex Deucher2c679122013-04-09 13:32:18 -0400522/**
523 * cik_get_xclk - get the xclk
524 *
525 * @rdev: radeon_device pointer
526 *
527 * Returns the reference clock used by the gfx engine
528 * (CIK).
529 */
530u32 cik_get_xclk(struct radeon_device *rdev)
531{
532 u32 reference_clock = rdev->clock.spll.reference_freq;
533
534 if (rdev->flags & RADEON_IS_IGP) {
535 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
536 return reference_clock / 2;
537 } else {
538 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
539 return reference_clock / 4;
540 }
541 return reference_clock;
542}
543
Alex Deucher75efdee2013-03-04 12:47:46 -0500544/**
545 * cik_mm_rdoorbell - read a doorbell dword
546 *
547 * @rdev: radeon_device pointer
548 * @offset: byte offset into the aperture
549 *
550 * Returns the value in the doorbell aperture at the
551 * requested offset (CIK).
552 */
553u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
554{
555 if (offset < rdev->doorbell.size) {
556 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
557 } else {
558 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
559 return 0;
560 }
561}
562
563/**
564 * cik_mm_wdoorbell - write a doorbell dword
565 *
566 * @rdev: radeon_device pointer
567 * @offset: byte offset into the aperture
568 * @v: value to write
569 *
570 * Writes @v to the doorbell aperture at the
571 * requested offset (CIK).
572 */
573void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
574{
575 if (offset < rdev->doorbell.size) {
576 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
577 } else {
578 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
579 }
580}
581
Alex Deucherbc8273f2012-06-29 19:44:04 -0400582#define BONAIRE_IO_MC_REGS_SIZE 36
583
584static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
585{
586 {0x00000070, 0x04400000},
587 {0x00000071, 0x80c01803},
588 {0x00000072, 0x00004004},
589 {0x00000073, 0x00000100},
590 {0x00000074, 0x00ff0000},
591 {0x00000075, 0x34000000},
592 {0x00000076, 0x08000014},
593 {0x00000077, 0x00cc08ec},
594 {0x00000078, 0x00000400},
595 {0x00000079, 0x00000000},
596 {0x0000007a, 0x04090000},
597 {0x0000007c, 0x00000000},
598 {0x0000007e, 0x4408a8e8},
599 {0x0000007f, 0x00000304},
600 {0x00000080, 0x00000000},
601 {0x00000082, 0x00000001},
602 {0x00000083, 0x00000002},
603 {0x00000084, 0xf3e4f400},
604 {0x00000085, 0x052024e3},
605 {0x00000087, 0x00000000},
606 {0x00000088, 0x01000000},
607 {0x0000008a, 0x1c0a0000},
608 {0x0000008b, 0xff010000},
609 {0x0000008d, 0xffffefff},
610 {0x0000008e, 0xfff3efff},
611 {0x0000008f, 0xfff3efbf},
612 {0x00000092, 0xf7ffffff},
613 {0x00000093, 0xffffff7f},
614 {0x00000095, 0x00101101},
615 {0x00000096, 0x00000fff},
616 {0x00000097, 0x00116fff},
617 {0x00000098, 0x60010000},
618 {0x00000099, 0x10010000},
619 {0x0000009a, 0x00006000},
620 {0x0000009b, 0x00001000},
621 {0x0000009f, 0x00b48000}
622};
623
Alex Deucherb556b122013-01-29 10:44:22 -0500624/**
625 * cik_srbm_select - select specific register instances
626 *
627 * @rdev: radeon_device pointer
628 * @me: selected ME (micro engine)
629 * @pipe: pipe
630 * @queue: queue
631 * @vmid: VMID
632 *
633 * Switches the currently active registers instances. Some
634 * registers are instanced per VMID, others are instanced per
635 * me/pipe/queue combination.
636 */
637static void cik_srbm_select(struct radeon_device *rdev,
638 u32 me, u32 pipe, u32 queue, u32 vmid)
639{
640 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
641 MEID(me & 0x3) |
642 VMID(vmid & 0xf) |
643 QUEUEID(queue & 0x7));
644 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
645}
646
Alex Deucherbc8273f2012-06-29 19:44:04 -0400647/* ucode loading */
648/**
649 * ci_mc_load_microcode - load MC ucode into the hw
650 *
651 * @rdev: radeon_device pointer
652 *
653 * Load the GDDR MC ucode into the hw (CIK).
654 * Returns 0 on success, error on failure.
655 */
656static int ci_mc_load_microcode(struct radeon_device *rdev)
657{
658 const __be32 *fw_data;
659 u32 running, blackout = 0;
660 u32 *io_mc_regs;
661 int i, ucode_size, regs_size;
662
663 if (!rdev->mc_fw)
664 return -EINVAL;
665
666 switch (rdev->family) {
667 case CHIP_BONAIRE:
668 default:
669 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
670 ucode_size = CIK_MC_UCODE_SIZE;
671 regs_size = BONAIRE_IO_MC_REGS_SIZE;
672 break;
673 }
674
675 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
676
677 if (running == 0) {
678 if (running) {
679 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
680 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
681 }
682
683 /* reset the engine and set to writable */
684 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
685 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
686
687 /* load mc io regs */
688 for (i = 0; i < regs_size; i++) {
689 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
690 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
691 }
692 /* load the MC ucode */
693 fw_data = (const __be32 *)rdev->mc_fw->data;
694 for (i = 0; i < ucode_size; i++)
695 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
696
697 /* put the engine back into the active state */
698 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
699 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
700 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
701
702 /* wait for training to complete */
703 for (i = 0; i < rdev->usec_timeout; i++) {
704 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
705 break;
706 udelay(1);
707 }
708 for (i = 0; i < rdev->usec_timeout; i++) {
709 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
710 break;
711 udelay(1);
712 }
713
714 if (running)
715 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
716 }
717
718 return 0;
719}
720
Alex Deucher02c81322012-12-18 21:43:07 -0500721/**
722 * cik_init_microcode - load ucode images from disk
723 *
724 * @rdev: radeon_device pointer
725 *
726 * Use the firmware interface to load the ucode images into
727 * the driver (not loaded into hw).
728 * Returns 0 on success, error on failure.
729 */
730static int cik_init_microcode(struct radeon_device *rdev)
731{
Alex Deucher02c81322012-12-18 21:43:07 -0500732 const char *chip_name;
733 size_t pfp_req_size, me_req_size, ce_req_size,
Alex Deucher21a93e12013-04-09 12:47:11 -0400734 mec_req_size, rlc_req_size, mc_req_size,
735 sdma_req_size;
Alex Deucher02c81322012-12-18 21:43:07 -0500736 char fw_name[30];
737 int err;
738
739 DRM_DEBUG("\n");
740
Alex Deucher02c81322012-12-18 21:43:07 -0500741 switch (rdev->family) {
742 case CHIP_BONAIRE:
743 chip_name = "BONAIRE";
744 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
745 me_req_size = CIK_ME_UCODE_SIZE * 4;
746 ce_req_size = CIK_CE_UCODE_SIZE * 4;
747 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
748 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
749 mc_req_size = CIK_MC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400750 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500751 break;
752 case CHIP_KAVERI:
753 chip_name = "KAVERI";
754 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
755 me_req_size = CIK_ME_UCODE_SIZE * 4;
756 ce_req_size = CIK_CE_UCODE_SIZE * 4;
757 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
758 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400759 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500760 break;
761 case CHIP_KABINI:
762 chip_name = "KABINI";
763 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
764 me_req_size = CIK_ME_UCODE_SIZE * 4;
765 ce_req_size = CIK_CE_UCODE_SIZE * 4;
766 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
767 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
Alex Deucher21a93e12013-04-09 12:47:11 -0400768 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
Alex Deucher02c81322012-12-18 21:43:07 -0500769 break;
770 default: BUG();
771 }
772
773 DRM_INFO("Loading %s Microcode\n", chip_name);
774
775 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400776 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500777 if (err)
778 goto out;
779 if (rdev->pfp_fw->size != pfp_req_size) {
780 printk(KERN_ERR
781 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
782 rdev->pfp_fw->size, fw_name);
783 err = -EINVAL;
784 goto out;
785 }
786
787 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400788 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500789 if (err)
790 goto out;
791 if (rdev->me_fw->size != me_req_size) {
792 printk(KERN_ERR
793 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 rdev->me_fw->size, fw_name);
795 err = -EINVAL;
796 }
797
798 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400799 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500800 if (err)
801 goto out;
802 if (rdev->ce_fw->size != ce_req_size) {
803 printk(KERN_ERR
804 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
805 rdev->ce_fw->size, fw_name);
806 err = -EINVAL;
807 }
808
809 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400810 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500811 if (err)
812 goto out;
813 if (rdev->mec_fw->size != mec_req_size) {
814 printk(KERN_ERR
815 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
816 rdev->mec_fw->size, fw_name);
817 err = -EINVAL;
818 }
819
820 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400821 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500822 if (err)
823 goto out;
824 if (rdev->rlc_fw->size != rlc_req_size) {
825 printk(KERN_ERR
826 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
827 rdev->rlc_fw->size, fw_name);
828 err = -EINVAL;
829 }
830
Alex Deucher21a93e12013-04-09 12:47:11 -0400831 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400832 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
Alex Deucher21a93e12013-04-09 12:47:11 -0400833 if (err)
834 goto out;
835 if (rdev->sdma_fw->size != sdma_req_size) {
836 printk(KERN_ERR
837 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
838 rdev->sdma_fw->size, fw_name);
839 err = -EINVAL;
840 }
841
Alex Deucher02c81322012-12-18 21:43:07 -0500842 /* No MC ucode on APUs */
843 if (!(rdev->flags & RADEON_IS_IGP)) {
844 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
Jerome Glisse0a168932013-07-11 15:53:01 -0400845 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
Alex Deucher02c81322012-12-18 21:43:07 -0500846 if (err)
847 goto out;
848 if (rdev->mc_fw->size != mc_req_size) {
849 printk(KERN_ERR
850 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
851 rdev->mc_fw->size, fw_name);
852 err = -EINVAL;
853 }
854 }
855
856out:
Alex Deucher02c81322012-12-18 21:43:07 -0500857 if (err) {
858 if (err != -EINVAL)
859 printk(KERN_ERR
860 "cik_cp: Failed to load firmware \"%s\"\n",
861 fw_name);
862 release_firmware(rdev->pfp_fw);
863 rdev->pfp_fw = NULL;
864 release_firmware(rdev->me_fw);
865 rdev->me_fw = NULL;
866 release_firmware(rdev->ce_fw);
867 rdev->ce_fw = NULL;
868 release_firmware(rdev->rlc_fw);
869 rdev->rlc_fw = NULL;
870 release_firmware(rdev->mc_fw);
871 rdev->mc_fw = NULL;
872 }
873 return err;
874}
875
Alex Deucher8cc1a532013-04-09 12:41:24 -0400876/*
877 * Core functions
878 */
879/**
880 * cik_tiling_mode_table_init - init the hw tiling table
881 *
882 * @rdev: radeon_device pointer
883 *
884 * Starting with SI, the tiling setup is done globally in a
885 * set of 32 tiling modes. Rather than selecting each set of
886 * parameters per surface as on older asics, we just select
887 * which index in the tiling table we want to use, and the
888 * surface uses those parameters (CIK).
889 */
890static void cik_tiling_mode_table_init(struct radeon_device *rdev)
891{
892 const u32 num_tile_mode_states = 32;
893 const u32 num_secondary_tile_mode_states = 16;
894 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
895 u32 num_pipe_configs;
896 u32 num_rbs = rdev->config.cik.max_backends_per_se *
897 rdev->config.cik.max_shader_engines;
898
899 switch (rdev->config.cik.mem_row_size_in_kb) {
900 case 1:
901 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
902 break;
903 case 2:
904 default:
905 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
906 break;
907 case 4:
908 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
909 break;
910 }
911
912 num_pipe_configs = rdev->config.cik.max_tile_pipes;
913 if (num_pipe_configs > 8)
914 num_pipe_configs = 8; /* ??? */
915
916 if (num_pipe_configs == 8) {
917 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
918 switch (reg_offset) {
919 case 0:
920 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
921 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
924 break;
925 case 1:
926 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
928 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
929 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
930 break;
931 case 2:
932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
936 break;
937 case 3:
938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
942 break;
943 case 4:
944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 TILE_SPLIT(split_equal_to_row_size));
948 break;
949 case 5:
950 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
952 break;
953 case 6:
954 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
955 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
957 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
958 break;
959 case 7:
960 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
961 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
963 TILE_SPLIT(split_equal_to_row_size));
964 break;
965 case 8:
966 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
967 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
968 break;
969 case 9:
970 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
971 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
972 break;
973 case 10:
974 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
975 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
978 break;
979 case 11:
980 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
981 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
982 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
984 break;
985 case 12:
986 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990 break;
991 case 13:
992 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
993 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
994 break;
995 case 14:
996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
997 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1000 break;
1001 case 16:
1002 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1003 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1004 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1006 break;
1007 case 17:
1008 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 break;
1013 case 27:
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1016 break;
1017 case 28:
1018 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1019 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1022 break;
1023 case 29:
1024 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1026 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1028 break;
1029 case 30:
1030 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 break;
1035 default:
1036 gb_tile_moden = 0;
1037 break;
1038 }
Alex Deucher39aee492013-04-10 13:41:25 -04001039 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001040 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1041 }
1042 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1043 switch (reg_offset) {
1044 case 0:
1045 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1046 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1047 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1048 NUM_BANKS(ADDR_SURF_16_BANK));
1049 break;
1050 case 1:
1051 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1052 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1053 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1054 NUM_BANKS(ADDR_SURF_16_BANK));
1055 break;
1056 case 2:
1057 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1060 NUM_BANKS(ADDR_SURF_16_BANK));
1061 break;
1062 case 3:
1063 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 NUM_BANKS(ADDR_SURF_16_BANK));
1067 break;
1068 case 4:
1069 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1072 NUM_BANKS(ADDR_SURF_8_BANK));
1073 break;
1074 case 5:
1075 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1078 NUM_BANKS(ADDR_SURF_4_BANK));
1079 break;
1080 case 6:
1081 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 NUM_BANKS(ADDR_SURF_2_BANK));
1085 break;
1086 case 8:
1087 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1090 NUM_BANKS(ADDR_SURF_16_BANK));
1091 break;
1092 case 9:
1093 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1096 NUM_BANKS(ADDR_SURF_16_BANK));
1097 break;
1098 case 10:
1099 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1102 NUM_BANKS(ADDR_SURF_16_BANK));
1103 break;
1104 case 11:
1105 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1108 NUM_BANKS(ADDR_SURF_16_BANK));
1109 break;
1110 case 12:
1111 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1114 NUM_BANKS(ADDR_SURF_8_BANK));
1115 break;
1116 case 13:
1117 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1120 NUM_BANKS(ADDR_SURF_4_BANK));
1121 break;
1122 case 14:
1123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 NUM_BANKS(ADDR_SURF_2_BANK));
1127 break;
1128 default:
1129 gb_tile_moden = 0;
1130 break;
1131 }
1132 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1133 }
1134 } else if (num_pipe_configs == 4) {
1135 if (num_rbs == 4) {
1136 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1137 switch (reg_offset) {
1138 case 0:
1139 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1140 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1141 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1142 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1143 break;
1144 case 1:
1145 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1146 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1147 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1148 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1149 break;
1150 case 2:
1151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1155 break;
1156 case 3:
1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1161 break;
1162 case 4:
1163 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 TILE_SPLIT(split_equal_to_row_size));
1167 break;
1168 case 5:
1169 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1171 break;
1172 case 6:
1173 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1175 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1177 break;
1178 case 7:
1179 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1181 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1182 TILE_SPLIT(split_equal_to_row_size));
1183 break;
1184 case 8:
1185 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1186 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1187 break;
1188 case 9:
1189 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1190 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1191 break;
1192 case 10:
1193 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1194 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1195 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1197 break;
1198 case 11:
1199 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1200 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1201 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1203 break;
1204 case 12:
1205 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209 break;
1210 case 13:
1211 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1213 break;
1214 case 14:
1215 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1216 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1217 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1219 break;
1220 case 16:
1221 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1222 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1223 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1225 break;
1226 case 17:
1227 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 break;
1232 case 27:
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1235 break;
1236 case 28:
1237 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1238 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1239 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1241 break;
1242 case 29:
1243 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1244 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1245 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1247 break;
1248 case 30:
1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 break;
1254 default:
1255 gb_tile_moden = 0;
1256 break;
1257 }
Alex Deucher39aee492013-04-10 13:41:25 -04001258 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001259 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1260 }
1261 } else if (num_rbs < 4) {
1262 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1263 switch (reg_offset) {
1264 case 0:
1265 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1267 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1269 break;
1270 case 1:
1271 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1273 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1274 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1275 break;
1276 case 2:
1277 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1281 break;
1282 case 3:
1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1287 break;
1288 case 4:
1289 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 TILE_SPLIT(split_equal_to_row_size));
1293 break;
1294 case 5:
1295 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1297 break;
1298 case 6:
1299 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1300 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1301 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1303 break;
1304 case 7:
1305 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1307 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1308 TILE_SPLIT(split_equal_to_row_size));
1309 break;
1310 case 8:
1311 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1312 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1313 break;
1314 case 9:
1315 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1316 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1317 break;
1318 case 10:
1319 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1320 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1321 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1323 break;
1324 case 11:
1325 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1326 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1327 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1329 break;
1330 case 12:
1331 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335 break;
1336 case 13:
1337 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1339 break;
1340 case 14:
1341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1342 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1343 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1345 break;
1346 case 16:
1347 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1348 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1349 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1351 break;
1352 case 17:
1353 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 break;
1358 case 27:
1359 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1361 break;
1362 case 28:
1363 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1364 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1365 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1367 break;
1368 case 29:
1369 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1371 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1373 break;
1374 case 30:
1375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379 break;
1380 default:
1381 gb_tile_moden = 0;
1382 break;
1383 }
Alex Deucher39aee492013-04-10 13:41:25 -04001384 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001385 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1386 }
1387 }
1388 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1389 switch (reg_offset) {
1390 case 0:
1391 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1394 NUM_BANKS(ADDR_SURF_16_BANK));
1395 break;
1396 case 1:
1397 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1400 NUM_BANKS(ADDR_SURF_16_BANK));
1401 break;
1402 case 2:
1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1406 NUM_BANKS(ADDR_SURF_16_BANK));
1407 break;
1408 case 3:
1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1412 NUM_BANKS(ADDR_SURF_16_BANK));
1413 break;
1414 case 4:
1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 NUM_BANKS(ADDR_SURF_16_BANK));
1419 break;
1420 case 5:
1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 NUM_BANKS(ADDR_SURF_8_BANK));
1425 break;
1426 case 6:
1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1430 NUM_BANKS(ADDR_SURF_4_BANK));
1431 break;
1432 case 8:
1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1436 NUM_BANKS(ADDR_SURF_16_BANK));
1437 break;
1438 case 9:
1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1442 NUM_BANKS(ADDR_SURF_16_BANK));
1443 break;
1444 case 10:
1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 NUM_BANKS(ADDR_SURF_16_BANK));
1449 break;
1450 case 11:
1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 NUM_BANKS(ADDR_SURF_16_BANK));
1455 break;
1456 case 12:
1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1460 NUM_BANKS(ADDR_SURF_16_BANK));
1461 break;
1462 case 13:
1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1466 NUM_BANKS(ADDR_SURF_8_BANK));
1467 break;
1468 case 14:
1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1472 NUM_BANKS(ADDR_SURF_4_BANK));
1473 break;
1474 default:
1475 gb_tile_moden = 0;
1476 break;
1477 }
1478 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1479 }
1480 } else if (num_pipe_configs == 2) {
1481 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1482 switch (reg_offset) {
1483 case 0:
1484 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1485 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1486 PIPE_CONFIG(ADDR_SURF_P2) |
1487 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1488 break;
1489 case 1:
1490 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1491 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1492 PIPE_CONFIG(ADDR_SURF_P2) |
1493 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1494 break;
1495 case 2:
1496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 PIPE_CONFIG(ADDR_SURF_P2) |
1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1500 break;
1501 case 3:
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 PIPE_CONFIG(ADDR_SURF_P2) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1506 break;
1507 case 4:
1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 PIPE_CONFIG(ADDR_SURF_P2) |
1511 TILE_SPLIT(split_equal_to_row_size));
1512 break;
1513 case 5:
1514 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1516 break;
1517 case 6:
1518 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1520 PIPE_CONFIG(ADDR_SURF_P2) |
1521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1522 break;
1523 case 7:
1524 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1525 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1526 PIPE_CONFIG(ADDR_SURF_P2) |
1527 TILE_SPLIT(split_equal_to_row_size));
1528 break;
1529 case 8:
1530 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1531 break;
1532 case 9:
1533 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1534 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1535 break;
1536 case 10:
1537 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1538 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1539 PIPE_CONFIG(ADDR_SURF_P2) |
1540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1541 break;
1542 case 11:
1543 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1544 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1545 PIPE_CONFIG(ADDR_SURF_P2) |
1546 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1547 break;
1548 case 12:
1549 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 PIPE_CONFIG(ADDR_SURF_P2) |
1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553 break;
1554 case 13:
1555 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1557 break;
1558 case 14:
1559 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1560 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1561 PIPE_CONFIG(ADDR_SURF_P2) |
1562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1563 break;
1564 case 16:
1565 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1566 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1567 PIPE_CONFIG(ADDR_SURF_P2) |
1568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1569 break;
1570 case 17:
1571 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 PIPE_CONFIG(ADDR_SURF_P2) |
1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575 break;
1576 case 27:
1577 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1578 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1579 break;
1580 case 28:
1581 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1582 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1583 PIPE_CONFIG(ADDR_SURF_P2) |
1584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1585 break;
1586 case 29:
1587 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1588 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1589 PIPE_CONFIG(ADDR_SURF_P2) |
1590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1591 break;
1592 case 30:
1593 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 PIPE_CONFIG(ADDR_SURF_P2) |
1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597 break;
1598 default:
1599 gb_tile_moden = 0;
1600 break;
1601 }
Alex Deucher39aee492013-04-10 13:41:25 -04001602 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001603 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1604 }
1605 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1606 switch (reg_offset) {
1607 case 0:
1608 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1611 NUM_BANKS(ADDR_SURF_16_BANK));
1612 break;
1613 case 1:
1614 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1615 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1616 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1617 NUM_BANKS(ADDR_SURF_16_BANK));
1618 break;
1619 case 2:
1620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 NUM_BANKS(ADDR_SURF_16_BANK));
1624 break;
1625 case 3:
1626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 NUM_BANKS(ADDR_SURF_16_BANK));
1630 break;
1631 case 4:
1632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 NUM_BANKS(ADDR_SURF_16_BANK));
1636 break;
1637 case 5:
1638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 NUM_BANKS(ADDR_SURF_16_BANK));
1642 break;
1643 case 6:
1644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1647 NUM_BANKS(ADDR_SURF_8_BANK));
1648 break;
1649 case 8:
1650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 NUM_BANKS(ADDR_SURF_16_BANK));
1654 break;
1655 case 9:
1656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1659 NUM_BANKS(ADDR_SURF_16_BANK));
1660 break;
1661 case 10:
1662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 NUM_BANKS(ADDR_SURF_16_BANK));
1666 break;
1667 case 11:
1668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 NUM_BANKS(ADDR_SURF_16_BANK));
1672 break;
1673 case 12:
1674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 NUM_BANKS(ADDR_SURF_16_BANK));
1678 break;
1679 case 13:
1680 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 NUM_BANKS(ADDR_SURF_16_BANK));
1684 break;
1685 case 14:
1686 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1689 NUM_BANKS(ADDR_SURF_8_BANK));
1690 break;
1691 default:
1692 gb_tile_moden = 0;
1693 break;
1694 }
1695 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1696 }
1697 } else
1698 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1699}
1700
1701/**
1702 * cik_select_se_sh - select which SE, SH to address
1703 *
1704 * @rdev: radeon_device pointer
1705 * @se_num: shader engine to address
1706 * @sh_num: sh block to address
1707 *
1708 * Select which SE, SH combinations to address. Certain
1709 * registers are instanced per SE or SH. 0xffffffff means
1710 * broadcast to all SEs or SHs (CIK).
1711 */
1712static void cik_select_se_sh(struct radeon_device *rdev,
1713 u32 se_num, u32 sh_num)
1714{
1715 u32 data = INSTANCE_BROADCAST_WRITES;
1716
1717 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
Alex Deucherb0fe3d32013-04-18 16:25:47 -04001718 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
Alex Deucher8cc1a532013-04-09 12:41:24 -04001719 else if (se_num == 0xffffffff)
1720 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1721 else if (sh_num == 0xffffffff)
1722 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1723 else
1724 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1725 WREG32(GRBM_GFX_INDEX, data);
1726}
1727
1728/**
1729 * cik_create_bitmask - create a bitmask
1730 *
1731 * @bit_width: length of the mask
1732 *
1733 * create a variable length bit mask (CIK).
1734 * Returns the bitmask.
1735 */
1736static u32 cik_create_bitmask(u32 bit_width)
1737{
1738 u32 i, mask = 0;
1739
1740 for (i = 0; i < bit_width; i++) {
1741 mask <<= 1;
1742 mask |= 1;
1743 }
1744 return mask;
1745}
1746
1747/**
1748 * cik_select_se_sh - select which SE, SH to address
1749 *
1750 * @rdev: radeon_device pointer
1751 * @max_rb_num: max RBs (render backends) for the asic
1752 * @se_num: number of SEs (shader engines) for the asic
1753 * @sh_per_se: number of SH blocks per SE for the asic
1754 *
1755 * Calculates the bitmask of disabled RBs (CIK).
1756 * Returns the disabled RB bitmask.
1757 */
1758static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1759 u32 max_rb_num, u32 se_num,
1760 u32 sh_per_se)
1761{
1762 u32 data, mask;
1763
1764 data = RREG32(CC_RB_BACKEND_DISABLE);
1765 if (data & 1)
1766 data &= BACKEND_DISABLE_MASK;
1767 else
1768 data = 0;
1769 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1770
1771 data >>= BACKEND_DISABLE_SHIFT;
1772
1773 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1774
1775 return data & mask;
1776}
1777
1778/**
1779 * cik_setup_rb - setup the RBs on the asic
1780 *
1781 * @rdev: radeon_device pointer
1782 * @se_num: number of SEs (shader engines) for the asic
1783 * @sh_per_se: number of SH blocks per SE for the asic
1784 * @max_rb_num: max RBs (render backends) for the asic
1785 *
1786 * Configures per-SE/SH RB registers (CIK).
1787 */
1788static void cik_setup_rb(struct radeon_device *rdev,
1789 u32 se_num, u32 sh_per_se,
1790 u32 max_rb_num)
1791{
1792 int i, j;
1793 u32 data, mask;
1794 u32 disabled_rbs = 0;
1795 u32 enabled_rbs = 0;
1796
1797 for (i = 0; i < se_num; i++) {
1798 for (j = 0; j < sh_per_se; j++) {
1799 cik_select_se_sh(rdev, i, j);
1800 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1801 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1802 }
1803 }
1804 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1805
1806 mask = 1;
1807 for (i = 0; i < max_rb_num; i++) {
1808 if (!(disabled_rbs & mask))
1809 enabled_rbs |= mask;
1810 mask <<= 1;
1811 }
1812
1813 for (i = 0; i < se_num; i++) {
1814 cik_select_se_sh(rdev, i, 0xffffffff);
1815 data = 0;
1816 for (j = 0; j < sh_per_se; j++) {
1817 switch (enabled_rbs & 3) {
1818 case 1:
1819 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1820 break;
1821 case 2:
1822 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1823 break;
1824 case 3:
1825 default:
1826 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1827 break;
1828 }
1829 enabled_rbs >>= 2;
1830 }
1831 WREG32(PA_SC_RASTER_CONFIG, data);
1832 }
1833 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1834}
1835
1836/**
1837 * cik_gpu_init - setup the 3D engine
1838 *
1839 * @rdev: radeon_device pointer
1840 *
1841 * Configures the 3D engine and tiling configuration
1842 * registers so that the 3D engine is usable.
1843 */
1844static void cik_gpu_init(struct radeon_device *rdev)
1845{
1846 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1847 u32 mc_shared_chmap, mc_arb_ramcfg;
1848 u32 hdp_host_path_cntl;
1849 u32 tmp;
1850 int i, j;
1851
1852 switch (rdev->family) {
1853 case CHIP_BONAIRE:
1854 rdev->config.cik.max_shader_engines = 2;
1855 rdev->config.cik.max_tile_pipes = 4;
1856 rdev->config.cik.max_cu_per_sh = 7;
1857 rdev->config.cik.max_sh_per_se = 1;
1858 rdev->config.cik.max_backends_per_se = 2;
1859 rdev->config.cik.max_texture_channel_caches = 4;
1860 rdev->config.cik.max_gprs = 256;
1861 rdev->config.cik.max_gs_threads = 32;
1862 rdev->config.cik.max_hw_contexts = 8;
1863
1864 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1865 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1866 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1867 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1868 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1869 break;
1870 case CHIP_KAVERI:
1871 /* TODO */
1872 break;
1873 case CHIP_KABINI:
1874 default:
1875 rdev->config.cik.max_shader_engines = 1;
1876 rdev->config.cik.max_tile_pipes = 2;
1877 rdev->config.cik.max_cu_per_sh = 2;
1878 rdev->config.cik.max_sh_per_se = 1;
1879 rdev->config.cik.max_backends_per_se = 1;
1880 rdev->config.cik.max_texture_channel_caches = 2;
1881 rdev->config.cik.max_gprs = 256;
1882 rdev->config.cik.max_gs_threads = 16;
1883 rdev->config.cik.max_hw_contexts = 8;
1884
1885 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1886 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1887 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1888 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1889 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1890 break;
1891 }
1892
1893 /* Initialize HDP */
1894 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1895 WREG32((0x2c14 + j), 0x00000000);
1896 WREG32((0x2c18 + j), 0x00000000);
1897 WREG32((0x2c1c + j), 0x00000000);
1898 WREG32((0x2c20 + j), 0x00000000);
1899 WREG32((0x2c24 + j), 0x00000000);
1900 }
1901
1902 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1903
1904 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1905
1906 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1907 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1908
1909 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1910 rdev->config.cik.mem_max_burst_length_bytes = 256;
1911 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1912 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1913 if (rdev->config.cik.mem_row_size_in_kb > 4)
1914 rdev->config.cik.mem_row_size_in_kb = 4;
1915 /* XXX use MC settings? */
1916 rdev->config.cik.shader_engine_tile_size = 32;
1917 rdev->config.cik.num_gpus = 1;
1918 rdev->config.cik.multi_gpu_tile_size = 64;
1919
1920 /* fix up row size */
1921 gb_addr_config &= ~ROW_SIZE_MASK;
1922 switch (rdev->config.cik.mem_row_size_in_kb) {
1923 case 1:
1924 default:
1925 gb_addr_config |= ROW_SIZE(0);
1926 break;
1927 case 2:
1928 gb_addr_config |= ROW_SIZE(1);
1929 break;
1930 case 4:
1931 gb_addr_config |= ROW_SIZE(2);
1932 break;
1933 }
1934
1935 /* setup tiling info dword. gb_addr_config is not adequate since it does
1936 * not have bank info, so create a custom tiling dword.
1937 * bits 3:0 num_pipes
1938 * bits 7:4 num_banks
1939 * bits 11:8 group_size
1940 * bits 15:12 row_size
1941 */
1942 rdev->config.cik.tile_config = 0;
1943 switch (rdev->config.cik.num_tile_pipes) {
1944 case 1:
1945 rdev->config.cik.tile_config |= (0 << 0);
1946 break;
1947 case 2:
1948 rdev->config.cik.tile_config |= (1 << 0);
1949 break;
1950 case 4:
1951 rdev->config.cik.tile_config |= (2 << 0);
1952 break;
1953 case 8:
1954 default:
1955 /* XXX what about 12? */
1956 rdev->config.cik.tile_config |= (3 << 0);
1957 break;
1958 }
1959 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1960 rdev->config.cik.tile_config |= 1 << 4;
1961 else
1962 rdev->config.cik.tile_config |= 0 << 4;
1963 rdev->config.cik.tile_config |=
1964 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1965 rdev->config.cik.tile_config |=
1966 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1967
1968 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1969 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1970 WREG32(DMIF_ADDR_CALC, gb_addr_config);
Alex Deucher21a93e12013-04-09 12:47:11 -04001971 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1972 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
Christian König87167bb2013-04-09 13:39:21 -04001973 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1974 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1975 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
Alex Deucher8cc1a532013-04-09 12:41:24 -04001976
1977 cik_tiling_mode_table_init(rdev);
1978
1979 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1980 rdev->config.cik.max_sh_per_se,
1981 rdev->config.cik.max_backends_per_se);
1982
1983 /* set HW defaults for 3D engine */
1984 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1985
1986 WREG32(SX_DEBUG_1, 0x20);
1987
1988 WREG32(TA_CNTL_AUX, 0x00010000);
1989
1990 tmp = RREG32(SPI_CONFIG_CNTL);
1991 tmp |= 0x03000000;
1992 WREG32(SPI_CONFIG_CNTL, tmp);
1993
1994 WREG32(SQ_CONFIG, 1);
1995
1996 WREG32(DB_DEBUG, 0);
1997
1998 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
1999 tmp |= 0x00000400;
2000 WREG32(DB_DEBUG2, tmp);
2001
2002 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2003 tmp |= 0x00020200;
2004 WREG32(DB_DEBUG3, tmp);
2005
2006 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2007 tmp |= 0x00018208;
2008 WREG32(CB_HW_CONTROL, tmp);
2009
2010 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2011
2012 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2013 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2014 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2015 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2016
2017 WREG32(VGT_NUM_INSTANCES, 1);
2018
2019 WREG32(CP_PERFMON_CNTL, 0);
2020
2021 WREG32(SQ_CONFIG, 0);
2022
2023 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2024 FORCE_EOV_MAX_REZ_CNT(255)));
2025
2026 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2027 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2028
2029 WREG32(VGT_GS_VERTEX_REUSE, 16);
2030 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2031
2032 tmp = RREG32(HDP_MISC_CNTL);
2033 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2034 WREG32(HDP_MISC_CNTL, tmp);
2035
2036 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2037 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2038
2039 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2040 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2041
2042 udelay(50);
2043}
2044
Alex Deucher841cf442012-12-18 21:47:44 -05002045/*
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002046 * GPU scratch registers helpers function.
2047 */
2048/**
2049 * cik_scratch_init - setup driver info for CP scratch regs
2050 *
2051 * @rdev: radeon_device pointer
2052 *
2053 * Set up the number and offset of the CP scratch registers.
2054 * NOTE: use of CP scratch registers is a legacy inferface and
2055 * is not used by default on newer asics (r6xx+). On newer asics,
2056 * memory buffers are used for fences rather than scratch regs.
2057 */
2058static void cik_scratch_init(struct radeon_device *rdev)
2059{
2060 int i;
2061
2062 rdev->scratch.num_reg = 7;
2063 rdev->scratch.reg_base = SCRATCH_REG0;
2064 for (i = 0; i < rdev->scratch.num_reg; i++) {
2065 rdev->scratch.free[i] = true;
2066 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2067 }
2068}
2069
2070/**
Alex Deucherfbc832c2012-07-20 14:41:35 -04002071 * cik_ring_test - basic gfx ring test
2072 *
2073 * @rdev: radeon_device pointer
2074 * @ring: radeon_ring structure holding ring information
2075 *
2076 * Allocate a scratch register and write to it using the gfx ring (CIK).
2077 * Provides a basic gfx ring test to verify that the ring is working.
2078 * Used by cik_cp_gfx_resume();
2079 * Returns 0 on success, error on failure.
2080 */
2081int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2082{
2083 uint32_t scratch;
2084 uint32_t tmp = 0;
2085 unsigned i;
2086 int r;
2087
2088 r = radeon_scratch_get(rdev, &scratch);
2089 if (r) {
2090 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2091 return r;
2092 }
2093 WREG32(scratch, 0xCAFEDEAD);
2094 r = radeon_ring_lock(rdev, ring, 3);
2095 if (r) {
2096 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2097 radeon_scratch_free(rdev, scratch);
2098 return r;
2099 }
2100 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2101 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2102 radeon_ring_write(ring, 0xDEADBEEF);
2103 radeon_ring_unlock_commit(rdev, ring);
Alex Deucher963e81f2013-06-26 17:37:11 -04002104
Alex Deucherfbc832c2012-07-20 14:41:35 -04002105 for (i = 0; i < rdev->usec_timeout; i++) {
2106 tmp = RREG32(scratch);
2107 if (tmp == 0xDEADBEEF)
2108 break;
2109 DRM_UDELAY(1);
2110 }
2111 if (i < rdev->usec_timeout) {
2112 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2113 } else {
2114 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2115 ring->idx, scratch, tmp);
2116 r = -EINVAL;
2117 }
2118 radeon_scratch_free(rdev, scratch);
2119 return r;
2120}
2121
2122/**
Alex Deucherb07fdd32013-04-11 09:36:17 -04002123 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002124 *
2125 * @rdev: radeon_device pointer
2126 * @fence: radeon fence object
2127 *
2128 * Emits a fence sequnce number on the gfx ring and flushes
2129 * GPU caches.
2130 */
Alex Deucherb07fdd32013-04-11 09:36:17 -04002131void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2132 struct radeon_fence *fence)
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002133{
2134 struct radeon_ring *ring = &rdev->ring[fence->ring];
2135 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2136
2137 /* EVENT_WRITE_EOP - flush caches, send int */
2138 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2139 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2140 EOP_TC_ACTION_EN |
2141 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2142 EVENT_INDEX(5)));
2143 radeon_ring_write(ring, addr & 0xfffffffc);
2144 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2145 radeon_ring_write(ring, fence->seq);
2146 radeon_ring_write(ring, 0);
2147 /* HDP flush */
2148 /* We should be using the new WAIT_REG_MEM special op packet here
2149 * but it causes the CP to hang
2150 */
2151 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2152 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2153 WRITE_DATA_DST_SEL(0)));
2154 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2155 radeon_ring_write(ring, 0);
2156 radeon_ring_write(ring, 0);
2157}
2158
Alex Deucherb07fdd32013-04-11 09:36:17 -04002159/**
2160 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2161 *
2162 * @rdev: radeon_device pointer
2163 * @fence: radeon fence object
2164 *
2165 * Emits a fence sequnce number on the compute ring and flushes
2166 * GPU caches.
2167 */
2168void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2169 struct radeon_fence *fence)
2170{
2171 struct radeon_ring *ring = &rdev->ring[fence->ring];
2172 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2173
2174 /* RELEASE_MEM - flush caches, send int */
2175 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2176 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2177 EOP_TC_ACTION_EN |
2178 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2179 EVENT_INDEX(5)));
2180 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2181 radeon_ring_write(ring, addr & 0xfffffffc);
2182 radeon_ring_write(ring, upper_32_bits(addr));
2183 radeon_ring_write(ring, fence->seq);
2184 radeon_ring_write(ring, 0);
2185 /* HDP flush */
2186 /* We should be using the new WAIT_REG_MEM special op packet here
2187 * but it causes the CP to hang
2188 */
2189 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2190 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2191 WRITE_DATA_DST_SEL(0)));
2192 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2193 radeon_ring_write(ring, 0);
2194 radeon_ring_write(ring, 0);
2195}
2196
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002197void cik_semaphore_ring_emit(struct radeon_device *rdev,
2198 struct radeon_ring *ring,
2199 struct radeon_semaphore *semaphore,
2200 bool emit_wait)
2201{
2202 uint64_t addr = semaphore->gpu_addr;
2203 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2204
2205 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2206 radeon_ring_write(ring, addr & 0xffffffff);
2207 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2208}
2209
2210/*
2211 * IB stuff
2212 */
2213/**
2214 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2215 *
2216 * @rdev: radeon_device pointer
2217 * @ib: radeon indirect buffer object
2218 *
2219 * Emits an DE (drawing engine) or CE (constant engine) IB
2220 * on the gfx ring. IBs are usually generated by userspace
2221 * acceleration drivers and submitted to the kernel for
2222 * sheduling on the ring. This function schedules the IB
2223 * on the gfx ring for execution by the GPU.
2224 */
2225void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2226{
2227 struct radeon_ring *ring = &rdev->ring[ib->ring];
2228 u32 header, control = INDIRECT_BUFFER_VALID;
2229
2230 if (ib->is_const_ib) {
2231 /* set switch buffer packet before const IB */
2232 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2233 radeon_ring_write(ring, 0);
2234
2235 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2236 } else {
2237 u32 next_rptr;
2238 if (ring->rptr_save_reg) {
2239 next_rptr = ring->wptr + 3 + 4;
2240 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2241 radeon_ring_write(ring, ((ring->rptr_save_reg -
2242 PACKET3_SET_UCONFIG_REG_START) >> 2));
2243 radeon_ring_write(ring, next_rptr);
2244 } else if (rdev->wb.enabled) {
2245 next_rptr = ring->wptr + 5 + 4;
2246 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2247 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2248 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2249 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2250 radeon_ring_write(ring, next_rptr);
2251 }
2252
2253 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2254 }
2255
2256 control |= ib->length_dw |
2257 (ib->vm ? (ib->vm->id << 24) : 0);
2258
2259 radeon_ring_write(ring, header);
2260 radeon_ring_write(ring,
2261#ifdef __BIG_ENDIAN
2262 (2 << 0) |
2263#endif
2264 (ib->gpu_addr & 0xFFFFFFFC));
2265 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2266 radeon_ring_write(ring, control);
2267}
2268
Alex Deucherfbc832c2012-07-20 14:41:35 -04002269/**
2270 * cik_ib_test - basic gfx ring IB test
2271 *
2272 * @rdev: radeon_device pointer
2273 * @ring: radeon_ring structure holding ring information
2274 *
2275 * Allocate an IB and execute it on the gfx ring (CIK).
2276 * Provides a basic gfx ring test to verify that IBs are working.
2277 * Returns 0 on success, error on failure.
2278 */
2279int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2280{
2281 struct radeon_ib ib;
2282 uint32_t scratch;
2283 uint32_t tmp = 0;
2284 unsigned i;
2285 int r;
2286
2287 r = radeon_scratch_get(rdev, &scratch);
2288 if (r) {
2289 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2290 return r;
2291 }
2292 WREG32(scratch, 0xCAFEDEAD);
2293 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2294 if (r) {
2295 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2296 return r;
2297 }
2298 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2299 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2300 ib.ptr[2] = 0xDEADBEEF;
2301 ib.length_dw = 3;
2302 r = radeon_ib_schedule(rdev, &ib, NULL);
2303 if (r) {
2304 radeon_scratch_free(rdev, scratch);
2305 radeon_ib_free(rdev, &ib);
2306 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2307 return r;
2308 }
2309 r = radeon_fence_wait(ib.fence, false);
2310 if (r) {
2311 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2312 return r;
2313 }
2314 for (i = 0; i < rdev->usec_timeout; i++) {
2315 tmp = RREG32(scratch);
2316 if (tmp == 0xDEADBEEF)
2317 break;
2318 DRM_UDELAY(1);
2319 }
2320 if (i < rdev->usec_timeout) {
2321 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2322 } else {
2323 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2324 scratch, tmp);
2325 r = -EINVAL;
2326 }
2327 radeon_scratch_free(rdev, scratch);
2328 radeon_ib_free(rdev, &ib);
2329 return r;
2330}
2331
Alex Deucher2cae3bc2012-07-05 11:45:40 -04002332/*
Alex Deucher841cf442012-12-18 21:47:44 -05002333 * CP.
2334 * On CIK, gfx and compute now have independant command processors.
2335 *
2336 * GFX
2337 * Gfx consists of a single ring and can process both gfx jobs and
2338 * compute jobs. The gfx CP consists of three microengines (ME):
2339 * PFP - Pre-Fetch Parser
2340 * ME - Micro Engine
2341 * CE - Constant Engine
2342 * The PFP and ME make up what is considered the Drawing Engine (DE).
2343 * The CE is an asynchronous engine used for updating buffer desciptors
2344 * used by the DE so that they can be loaded into cache in parallel
2345 * while the DE is processing state update packets.
2346 *
2347 * Compute
2348 * The compute CP consists of two microengines (ME):
2349 * MEC1 - Compute MicroEngine 1
2350 * MEC2 - Compute MicroEngine 2
2351 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2352 * The queues are exposed to userspace and are programmed directly
2353 * by the compute runtime.
2354 */
2355/**
2356 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2357 *
2358 * @rdev: radeon_device pointer
2359 * @enable: enable or disable the MEs
2360 *
2361 * Halts or unhalts the gfx MEs.
2362 */
2363static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2364{
2365 if (enable)
2366 WREG32(CP_ME_CNTL, 0);
2367 else {
2368 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2369 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2370 }
2371 udelay(50);
2372}
2373
2374/**
2375 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2376 *
2377 * @rdev: radeon_device pointer
2378 *
2379 * Loads the gfx PFP, ME, and CE ucode.
2380 * Returns 0 for success, -EINVAL if the ucode is not available.
2381 */
2382static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2383{
2384 const __be32 *fw_data;
2385 int i;
2386
2387 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2388 return -EINVAL;
2389
2390 cik_cp_gfx_enable(rdev, false);
2391
2392 /* PFP */
2393 fw_data = (const __be32 *)rdev->pfp_fw->data;
2394 WREG32(CP_PFP_UCODE_ADDR, 0);
2395 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2396 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2397 WREG32(CP_PFP_UCODE_ADDR, 0);
2398
2399 /* CE */
2400 fw_data = (const __be32 *)rdev->ce_fw->data;
2401 WREG32(CP_CE_UCODE_ADDR, 0);
2402 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2403 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2404 WREG32(CP_CE_UCODE_ADDR, 0);
2405
2406 /* ME */
2407 fw_data = (const __be32 *)rdev->me_fw->data;
2408 WREG32(CP_ME_RAM_WADDR, 0);
2409 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2410 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2411 WREG32(CP_ME_RAM_WADDR, 0);
2412
2413 WREG32(CP_PFP_UCODE_ADDR, 0);
2414 WREG32(CP_CE_UCODE_ADDR, 0);
2415 WREG32(CP_ME_RAM_WADDR, 0);
2416 WREG32(CP_ME_RAM_RADDR, 0);
2417 return 0;
2418}
2419
2420/**
2421 * cik_cp_gfx_start - start the gfx ring
2422 *
2423 * @rdev: radeon_device pointer
2424 *
2425 * Enables the ring and loads the clear state context and other
2426 * packets required to init the ring.
2427 * Returns 0 for success, error for failure.
2428 */
2429static int cik_cp_gfx_start(struct radeon_device *rdev)
2430{
2431 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2432 int r, i;
2433
2434 /* init the CP */
2435 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2436 WREG32(CP_ENDIAN_SWAP, 0);
2437 WREG32(CP_DEVICE_ID, 1);
2438
2439 cik_cp_gfx_enable(rdev, true);
2440
2441 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2442 if (r) {
2443 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2444 return r;
2445 }
2446
2447 /* init the CE partitions. CE only used for gfx on CIK */
2448 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2449 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2450 radeon_ring_write(ring, 0xc000);
2451 radeon_ring_write(ring, 0xc000);
2452
2453 /* setup clear context state */
2454 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2455 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2456
2457 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2458 radeon_ring_write(ring, 0x80000000);
2459 radeon_ring_write(ring, 0x80000000);
2460
2461 for (i = 0; i < cik_default_size; i++)
2462 radeon_ring_write(ring, cik_default_state[i]);
2463
2464 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2465 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2466
2467 /* set clear context state */
2468 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2469 radeon_ring_write(ring, 0);
2470
2471 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2472 radeon_ring_write(ring, 0x00000316);
2473 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2474 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2475
2476 radeon_ring_unlock_commit(rdev, ring);
2477
2478 return 0;
2479}
2480
2481/**
2482 * cik_cp_gfx_fini - stop the gfx ring
2483 *
2484 * @rdev: radeon_device pointer
2485 *
2486 * Stop the gfx ring and tear down the driver ring
2487 * info.
2488 */
2489static void cik_cp_gfx_fini(struct radeon_device *rdev)
2490{
2491 cik_cp_gfx_enable(rdev, false);
2492 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2493}
2494
2495/**
2496 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2497 *
2498 * @rdev: radeon_device pointer
2499 *
2500 * Program the location and size of the gfx ring buffer
2501 * and test it to make sure it's working.
2502 * Returns 0 for success, error for failure.
2503 */
2504static int cik_cp_gfx_resume(struct radeon_device *rdev)
2505{
2506 struct radeon_ring *ring;
2507 u32 tmp;
2508 u32 rb_bufsz;
2509 u64 rb_addr;
2510 int r;
2511
2512 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2513 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2514
2515 /* Set the write pointer delay */
2516 WREG32(CP_RB_WPTR_DELAY, 0);
2517
2518 /* set the RB to use vmid 0 */
2519 WREG32(CP_RB_VMID, 0);
2520
2521 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2522
2523 /* ring 0 - compute and gfx */
2524 /* Set ring buffer size */
2525 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2526 rb_bufsz = drm_order(ring->ring_size / 8);
2527 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2528#ifdef __BIG_ENDIAN
2529 tmp |= BUF_SWAP_32BIT;
2530#endif
2531 WREG32(CP_RB0_CNTL, tmp);
2532
2533 /* Initialize the ring buffer's read and write pointers */
2534 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2535 ring->wptr = 0;
2536 WREG32(CP_RB0_WPTR, ring->wptr);
2537
2538 /* set the wb address wether it's enabled or not */
2539 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2540 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2541
2542 /* scratch register shadowing is no longer supported */
2543 WREG32(SCRATCH_UMSK, 0);
2544
2545 if (!rdev->wb.enabled)
2546 tmp |= RB_NO_UPDATE;
2547
2548 mdelay(1);
2549 WREG32(CP_RB0_CNTL, tmp);
2550
2551 rb_addr = ring->gpu_addr >> 8;
2552 WREG32(CP_RB0_BASE, rb_addr);
2553 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2554
2555 ring->rptr = RREG32(CP_RB0_RPTR);
2556
2557 /* start the ring */
2558 cik_cp_gfx_start(rdev);
2559 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2560 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2561 if (r) {
2562 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2563 return r;
2564 }
2565 return 0;
2566}
2567
Alex Deucher963e81f2013-06-26 17:37:11 -04002568u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2569 struct radeon_ring *ring)
2570{
2571 u32 rptr;
2572
2573
2574
2575 if (rdev->wb.enabled) {
2576 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2577 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002578 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002579 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2580 rptr = RREG32(CP_HQD_PQ_RPTR);
2581 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002582 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002583 }
2584 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2585
2586 return rptr;
2587}
2588
2589u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2590 struct radeon_ring *ring)
2591{
2592 u32 wptr;
2593
2594 if (rdev->wb.enabled) {
2595 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2596 } else {
Alex Deucherf61d5b462013-08-06 12:40:16 -04002597 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002598 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2599 wptr = RREG32(CP_HQD_PQ_WPTR);
2600 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002601 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002602 }
2603 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2604
2605 return wptr;
2606}
2607
2608void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2609 struct radeon_ring *ring)
2610{
2611 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2612
2613 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2614 WDOORBELL32(ring->doorbell_offset, wptr);
2615}
2616
Alex Deucher841cf442012-12-18 21:47:44 -05002617/**
2618 * cik_cp_compute_enable - enable/disable the compute CP MEs
2619 *
2620 * @rdev: radeon_device pointer
2621 * @enable: enable or disable the MEs
2622 *
2623 * Halts or unhalts the compute MEs.
2624 */
2625static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2626{
2627 if (enable)
2628 WREG32(CP_MEC_CNTL, 0);
2629 else
2630 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2631 udelay(50);
2632}
2633
2634/**
2635 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2636 *
2637 * @rdev: radeon_device pointer
2638 *
2639 * Loads the compute MEC1&2 ucode.
2640 * Returns 0 for success, -EINVAL if the ucode is not available.
2641 */
2642static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2643{
2644 const __be32 *fw_data;
2645 int i;
2646
2647 if (!rdev->mec_fw)
2648 return -EINVAL;
2649
2650 cik_cp_compute_enable(rdev, false);
2651
2652 /* MEC1 */
2653 fw_data = (const __be32 *)rdev->mec_fw->data;
2654 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2655 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2656 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2657 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2658
2659 if (rdev->family == CHIP_KAVERI) {
2660 /* MEC2 */
2661 fw_data = (const __be32 *)rdev->mec_fw->data;
2662 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2663 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2664 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2665 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2666 }
2667
2668 return 0;
2669}
2670
2671/**
2672 * cik_cp_compute_start - start the compute queues
2673 *
2674 * @rdev: radeon_device pointer
2675 *
2676 * Enable the compute queues.
2677 * Returns 0 for success, error for failure.
2678 */
2679static int cik_cp_compute_start(struct radeon_device *rdev)
2680{
Alex Deucher963e81f2013-06-26 17:37:11 -04002681 cik_cp_compute_enable(rdev, true);
2682
Alex Deucher841cf442012-12-18 21:47:44 -05002683 return 0;
2684}
2685
2686/**
2687 * cik_cp_compute_fini - stop the compute queues
2688 *
2689 * @rdev: radeon_device pointer
2690 *
2691 * Stop the compute queues and tear down the driver queue
2692 * info.
2693 */
2694static void cik_cp_compute_fini(struct radeon_device *rdev)
2695{
Alex Deucher963e81f2013-06-26 17:37:11 -04002696 int i, idx, r;
2697
Alex Deucher841cf442012-12-18 21:47:44 -05002698 cik_cp_compute_enable(rdev, false);
Alex Deucher963e81f2013-06-26 17:37:11 -04002699
2700 for (i = 0; i < 2; i++) {
2701 if (i == 0)
2702 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2703 else
2704 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2705
2706 if (rdev->ring[idx].mqd_obj) {
2707 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2708 if (unlikely(r != 0))
2709 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2710
2711 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2712 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2713
2714 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2715 rdev->ring[idx].mqd_obj = NULL;
2716 }
2717 }
Alex Deucher841cf442012-12-18 21:47:44 -05002718}
2719
Alex Deucher963e81f2013-06-26 17:37:11 -04002720static void cik_mec_fini(struct radeon_device *rdev)
2721{
2722 int r;
2723
2724 if (rdev->mec.hpd_eop_obj) {
2725 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2726 if (unlikely(r != 0))
2727 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2728 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2729 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2730
2731 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2732 rdev->mec.hpd_eop_obj = NULL;
2733 }
2734}
2735
2736#define MEC_HPD_SIZE 2048
2737
2738static int cik_mec_init(struct radeon_device *rdev)
2739{
2740 int r;
2741 u32 *hpd;
2742
2743 /*
2744 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2745 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2746 */
2747 if (rdev->family == CHIP_KAVERI)
2748 rdev->mec.num_mec = 2;
2749 else
2750 rdev->mec.num_mec = 1;
2751 rdev->mec.num_pipe = 4;
2752 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2753
2754 if (rdev->mec.hpd_eop_obj == NULL) {
2755 r = radeon_bo_create(rdev,
2756 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2757 PAGE_SIZE, true,
2758 RADEON_GEM_DOMAIN_GTT, NULL,
2759 &rdev->mec.hpd_eop_obj);
2760 if (r) {
2761 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2762 return r;
2763 }
2764 }
2765
2766 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2767 if (unlikely(r != 0)) {
2768 cik_mec_fini(rdev);
2769 return r;
2770 }
2771 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2772 &rdev->mec.hpd_eop_gpu_addr);
2773 if (r) {
2774 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2775 cik_mec_fini(rdev);
2776 return r;
2777 }
2778 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2779 if (r) {
2780 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2781 cik_mec_fini(rdev);
2782 return r;
2783 }
2784
2785 /* clear memory. Not sure if this is required or not */
2786 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2787
2788 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2789 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2790
2791 return 0;
2792}
2793
2794struct hqd_registers
2795{
2796 u32 cp_mqd_base_addr;
2797 u32 cp_mqd_base_addr_hi;
2798 u32 cp_hqd_active;
2799 u32 cp_hqd_vmid;
2800 u32 cp_hqd_persistent_state;
2801 u32 cp_hqd_pipe_priority;
2802 u32 cp_hqd_queue_priority;
2803 u32 cp_hqd_quantum;
2804 u32 cp_hqd_pq_base;
2805 u32 cp_hqd_pq_base_hi;
2806 u32 cp_hqd_pq_rptr;
2807 u32 cp_hqd_pq_rptr_report_addr;
2808 u32 cp_hqd_pq_rptr_report_addr_hi;
2809 u32 cp_hqd_pq_wptr_poll_addr;
2810 u32 cp_hqd_pq_wptr_poll_addr_hi;
2811 u32 cp_hqd_pq_doorbell_control;
2812 u32 cp_hqd_pq_wptr;
2813 u32 cp_hqd_pq_control;
2814 u32 cp_hqd_ib_base_addr;
2815 u32 cp_hqd_ib_base_addr_hi;
2816 u32 cp_hqd_ib_rptr;
2817 u32 cp_hqd_ib_control;
2818 u32 cp_hqd_iq_timer;
2819 u32 cp_hqd_iq_rptr;
2820 u32 cp_hqd_dequeue_request;
2821 u32 cp_hqd_dma_offload;
2822 u32 cp_hqd_sema_cmd;
2823 u32 cp_hqd_msg_type;
2824 u32 cp_hqd_atomic0_preop_lo;
2825 u32 cp_hqd_atomic0_preop_hi;
2826 u32 cp_hqd_atomic1_preop_lo;
2827 u32 cp_hqd_atomic1_preop_hi;
2828 u32 cp_hqd_hq_scheduler0;
2829 u32 cp_hqd_hq_scheduler1;
2830 u32 cp_mqd_control;
2831};
2832
2833struct bonaire_mqd
2834{
2835 u32 header;
2836 u32 dispatch_initiator;
2837 u32 dimensions[3];
2838 u32 start_idx[3];
2839 u32 num_threads[3];
2840 u32 pipeline_stat_enable;
2841 u32 perf_counter_enable;
2842 u32 pgm[2];
2843 u32 tba[2];
2844 u32 tma[2];
2845 u32 pgm_rsrc[2];
2846 u32 vmid;
2847 u32 resource_limits;
2848 u32 static_thread_mgmt01[2];
2849 u32 tmp_ring_size;
2850 u32 static_thread_mgmt23[2];
2851 u32 restart[3];
2852 u32 thread_trace_enable;
2853 u32 reserved1;
2854 u32 user_data[16];
2855 u32 vgtcs_invoke_count[2];
2856 struct hqd_registers queue_state;
2857 u32 dequeue_cntr;
2858 u32 interrupt_queue[64];
2859};
2860
Alex Deucher841cf442012-12-18 21:47:44 -05002861/**
2862 * cik_cp_compute_resume - setup the compute queue registers
2863 *
2864 * @rdev: radeon_device pointer
2865 *
2866 * Program the compute queues and test them to make sure they
2867 * are working.
2868 * Returns 0 for success, error for failure.
2869 */
2870static int cik_cp_compute_resume(struct radeon_device *rdev)
2871{
Alex Deucher963e81f2013-06-26 17:37:11 -04002872 int r, i, idx;
2873 u32 tmp;
2874 bool use_doorbell = true;
2875 u64 hqd_gpu_addr;
2876 u64 mqd_gpu_addr;
2877 u64 eop_gpu_addr;
2878 u64 wb_gpu_addr;
2879 u32 *buf;
2880 struct bonaire_mqd *mqd;
Alex Deucher841cf442012-12-18 21:47:44 -05002881
Alex Deucher841cf442012-12-18 21:47:44 -05002882 r = cik_cp_compute_start(rdev);
2883 if (r)
2884 return r;
Alex Deucher963e81f2013-06-26 17:37:11 -04002885
2886 /* fix up chicken bits */
2887 tmp = RREG32(CP_CPF_DEBUG);
2888 tmp |= (1 << 23);
2889 WREG32(CP_CPF_DEBUG, tmp);
2890
2891 /* init the pipes */
Alex Deucherf61d5b462013-08-06 12:40:16 -04002892 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002893 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2894 int me = (i < 4) ? 1 : 2;
2895 int pipe = (i < 4) ? i : (i - 4);
2896
2897 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2898
2899 cik_srbm_select(rdev, me, pipe, 0, 0);
2900
2901 /* write the EOP addr */
2902 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2903 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2904
2905 /* set the VMID assigned */
2906 WREG32(CP_HPD_EOP_VMID, 0);
2907
2908 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2909 tmp = RREG32(CP_HPD_EOP_CONTROL);
2910 tmp &= ~EOP_SIZE_MASK;
2911 tmp |= drm_order(MEC_HPD_SIZE / 8);
2912 WREG32(CP_HPD_EOP_CONTROL, tmp);
2913 }
2914 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04002915 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002916
2917 /* init the queues. Just two for now. */
2918 for (i = 0; i < 2; i++) {
2919 if (i == 0)
2920 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2921 else
2922 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2923
2924 if (rdev->ring[idx].mqd_obj == NULL) {
2925 r = radeon_bo_create(rdev,
2926 sizeof(struct bonaire_mqd),
2927 PAGE_SIZE, true,
2928 RADEON_GEM_DOMAIN_GTT, NULL,
2929 &rdev->ring[idx].mqd_obj);
2930 if (r) {
2931 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2932 return r;
2933 }
2934 }
2935
2936 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2937 if (unlikely(r != 0)) {
2938 cik_cp_compute_fini(rdev);
2939 return r;
2940 }
2941 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2942 &mqd_gpu_addr);
2943 if (r) {
2944 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2945 cik_cp_compute_fini(rdev);
2946 return r;
2947 }
2948 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2949 if (r) {
2950 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2951 cik_cp_compute_fini(rdev);
2952 return r;
2953 }
2954
2955 /* doorbell offset */
2956 rdev->ring[idx].doorbell_offset =
2957 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2958
2959 /* init the mqd struct */
2960 memset(buf, 0, sizeof(struct bonaire_mqd));
2961
2962 mqd = (struct bonaire_mqd *)buf;
2963 mqd->header = 0xC0310800;
2964 mqd->static_thread_mgmt01[0] = 0xffffffff;
2965 mqd->static_thread_mgmt01[1] = 0xffffffff;
2966 mqd->static_thread_mgmt23[0] = 0xffffffff;
2967 mqd->static_thread_mgmt23[1] = 0xffffffff;
2968
Alex Deucherf61d5b462013-08-06 12:40:16 -04002969 mutex_lock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04002970 cik_srbm_select(rdev, rdev->ring[idx].me,
2971 rdev->ring[idx].pipe,
2972 rdev->ring[idx].queue, 0);
2973
2974 /* disable wptr polling */
2975 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2976 tmp &= ~WPTR_POLL_EN;
2977 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2978
2979 /* enable doorbell? */
2980 mqd->queue_state.cp_hqd_pq_doorbell_control =
2981 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2982 if (use_doorbell)
2983 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2984 else
2985 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2986 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2987 mqd->queue_state.cp_hqd_pq_doorbell_control);
2988
2989 /* disable the queue if it's active */
2990 mqd->queue_state.cp_hqd_dequeue_request = 0;
2991 mqd->queue_state.cp_hqd_pq_rptr = 0;
2992 mqd->queue_state.cp_hqd_pq_wptr= 0;
2993 if (RREG32(CP_HQD_ACTIVE) & 1) {
2994 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
2995 for (i = 0; i < rdev->usec_timeout; i++) {
2996 if (!(RREG32(CP_HQD_ACTIVE) & 1))
2997 break;
2998 udelay(1);
2999 }
3000 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3001 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3002 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3003 }
3004
3005 /* set the pointer to the MQD */
3006 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3007 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3008 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3009 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3010 /* set MQD vmid to 0 */
3011 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3012 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3013 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3014
3015 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3016 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3017 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3018 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3019 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3020 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3021
3022 /* set up the HQD, this is similar to CP_RB0_CNTL */
3023 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3024 mqd->queue_state.cp_hqd_pq_control &=
3025 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3026
3027 mqd->queue_state.cp_hqd_pq_control |=
3028 drm_order(rdev->ring[idx].ring_size / 8);
3029 mqd->queue_state.cp_hqd_pq_control |=
3030 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3031#ifdef __BIG_ENDIAN
3032 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3033#endif
3034 mqd->queue_state.cp_hqd_pq_control &=
3035 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3036 mqd->queue_state.cp_hqd_pq_control |=
3037 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3038 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3039
3040 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3041 if (i == 0)
3042 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3043 else
3044 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3045 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3046 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3047 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3048 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3049 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3050
3051 /* set the wb address wether it's enabled or not */
3052 if (i == 0)
3053 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3054 else
3055 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3056 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3057 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3058 upper_32_bits(wb_gpu_addr) & 0xffff;
3059 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3060 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3061 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3062 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3063
3064 /* enable the doorbell if requested */
3065 if (use_doorbell) {
3066 mqd->queue_state.cp_hqd_pq_doorbell_control =
3067 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3068 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3069 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3070 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3071 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3072 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3073 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3074
3075 } else {
3076 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3077 }
3078 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3079 mqd->queue_state.cp_hqd_pq_doorbell_control);
3080
3081 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3082 rdev->ring[idx].wptr = 0;
3083 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3084 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3085 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3086 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3087
3088 /* set the vmid for the queue */
3089 mqd->queue_state.cp_hqd_vmid = 0;
3090 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3091
3092 /* activate the queue */
3093 mqd->queue_state.cp_hqd_active = 1;
3094 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3095
3096 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04003097 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher963e81f2013-06-26 17:37:11 -04003098
3099 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3100 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3101
3102 rdev->ring[idx].ready = true;
3103 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3104 if (r)
3105 rdev->ring[idx].ready = false;
3106 }
3107
Alex Deucher841cf442012-12-18 21:47:44 -05003108 return 0;
3109}
3110
Alex Deucher841cf442012-12-18 21:47:44 -05003111static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3112{
3113 cik_cp_gfx_enable(rdev, enable);
3114 cik_cp_compute_enable(rdev, enable);
3115}
3116
Alex Deucher841cf442012-12-18 21:47:44 -05003117static int cik_cp_load_microcode(struct radeon_device *rdev)
3118{
3119 int r;
3120
3121 r = cik_cp_gfx_load_microcode(rdev);
3122 if (r)
3123 return r;
3124 r = cik_cp_compute_load_microcode(rdev);
3125 if (r)
3126 return r;
3127
3128 return 0;
3129}
3130
Alex Deucher841cf442012-12-18 21:47:44 -05003131static void cik_cp_fini(struct radeon_device *rdev)
3132{
3133 cik_cp_gfx_fini(rdev);
3134 cik_cp_compute_fini(rdev);
3135}
3136
Alex Deucher841cf442012-12-18 21:47:44 -05003137static int cik_cp_resume(struct radeon_device *rdev)
3138{
3139 int r;
3140
3141 /* Reset all cp blocks */
3142 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3143 RREG32(GRBM_SOFT_RESET);
3144 mdelay(15);
3145 WREG32(GRBM_SOFT_RESET, 0);
3146 RREG32(GRBM_SOFT_RESET);
3147
3148 r = cik_cp_load_microcode(rdev);
3149 if (r)
3150 return r;
3151
3152 r = cik_cp_gfx_resume(rdev);
3153 if (r)
3154 return r;
3155 r = cik_cp_compute_resume(rdev);
3156 if (r)
3157 return r;
3158
3159 return 0;
3160}
3161
Alex Deucher21a93e12013-04-09 12:47:11 -04003162/*
3163 * sDMA - System DMA
3164 * Starting with CIK, the GPU has new asynchronous
3165 * DMA engines. These engines are used for compute
3166 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3167 * and each one supports 1 ring buffer used for gfx
3168 * and 2 queues used for compute.
3169 *
3170 * The programming model is very similar to the CP
3171 * (ring buffer, IBs, etc.), but sDMA has it's own
3172 * packet format that is different from the PM4 format
3173 * used by the CP. sDMA supports copying data, writing
3174 * embedded data, solid fills, and a number of other
3175 * things. It also has support for tiling/detiling of
3176 * buffers.
3177 */
3178/**
3179 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3180 *
3181 * @rdev: radeon_device pointer
3182 * @ib: IB object to schedule
3183 *
3184 * Schedule an IB in the DMA ring (CIK).
3185 */
3186void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3187 struct radeon_ib *ib)
3188{
3189 struct radeon_ring *ring = &rdev->ring[ib->ring];
3190 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3191
3192 if (rdev->wb.enabled) {
3193 u32 next_rptr = ring->wptr + 5;
3194 while ((next_rptr & 7) != 4)
3195 next_rptr++;
3196 next_rptr += 4;
3197 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3198 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3199 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3200 radeon_ring_write(ring, 1); /* number of DWs to follow */
3201 radeon_ring_write(ring, next_rptr);
3202 }
3203
3204 /* IB packet must end on a 8 DW boundary */
3205 while ((ring->wptr & 7) != 4)
3206 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3207 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3208 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3209 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3210 radeon_ring_write(ring, ib->length_dw);
3211
3212}
3213
3214/**
3215 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3216 *
3217 * @rdev: radeon_device pointer
3218 * @fence: radeon fence object
3219 *
3220 * Add a DMA fence packet to the ring to write
3221 * the fence seq number and DMA trap packet to generate
3222 * an interrupt if needed (CIK).
3223 */
3224void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3225 struct radeon_fence *fence)
3226{
3227 struct radeon_ring *ring = &rdev->ring[fence->ring];
3228 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3229 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3230 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3231 u32 ref_and_mask;
3232
3233 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3234 ref_and_mask = SDMA0;
3235 else
3236 ref_and_mask = SDMA1;
3237
3238 /* write the fence */
3239 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3240 radeon_ring_write(ring, addr & 0xffffffff);
3241 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3242 radeon_ring_write(ring, fence->seq);
3243 /* generate an interrupt */
3244 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3245 /* flush HDP */
3246 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3247 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3248 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3249 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3250 radeon_ring_write(ring, ref_and_mask); /* MASK */
3251 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3252}
3253
3254/**
3255 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3256 *
3257 * @rdev: radeon_device pointer
3258 * @ring: radeon_ring structure holding ring information
3259 * @semaphore: radeon semaphore object
3260 * @emit_wait: wait or signal semaphore
3261 *
3262 * Add a DMA semaphore packet to the ring wait on or signal
3263 * other rings (CIK).
3264 */
3265void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3266 struct radeon_ring *ring,
3267 struct radeon_semaphore *semaphore,
3268 bool emit_wait)
3269{
3270 u64 addr = semaphore->gpu_addr;
3271 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3272
3273 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3274 radeon_ring_write(ring, addr & 0xfffffff8);
3275 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3276}
3277
3278/**
3279 * cik_sdma_gfx_stop - stop the gfx async dma engines
3280 *
3281 * @rdev: radeon_device pointer
3282 *
3283 * Stop the gfx async dma ring buffers (CIK).
3284 */
3285static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3286{
3287 u32 rb_cntl, reg_offset;
3288 int i;
3289
3290 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3291
3292 for (i = 0; i < 2; i++) {
3293 if (i == 0)
3294 reg_offset = SDMA0_REGISTER_OFFSET;
3295 else
3296 reg_offset = SDMA1_REGISTER_OFFSET;
3297 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3298 rb_cntl &= ~SDMA_RB_ENABLE;
3299 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3300 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3301 }
3302}
3303
3304/**
3305 * cik_sdma_rlc_stop - stop the compute async dma engines
3306 *
3307 * @rdev: radeon_device pointer
3308 *
3309 * Stop the compute async dma queues (CIK).
3310 */
3311static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3312{
3313 /* XXX todo */
3314}
3315
3316/**
3317 * cik_sdma_enable - stop the async dma engines
3318 *
3319 * @rdev: radeon_device pointer
3320 * @enable: enable/disable the DMA MEs.
3321 *
3322 * Halt or unhalt the async dma engines (CIK).
3323 */
3324static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3325{
3326 u32 me_cntl, reg_offset;
3327 int i;
3328
3329 for (i = 0; i < 2; i++) {
3330 if (i == 0)
3331 reg_offset = SDMA0_REGISTER_OFFSET;
3332 else
3333 reg_offset = SDMA1_REGISTER_OFFSET;
3334 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3335 if (enable)
3336 me_cntl &= ~SDMA_HALT;
3337 else
3338 me_cntl |= SDMA_HALT;
3339 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3340 }
3341}
3342
3343/**
3344 * cik_sdma_gfx_resume - setup and start the async dma engines
3345 *
3346 * @rdev: radeon_device pointer
3347 *
3348 * Set up the gfx DMA ring buffers and enable them (CIK).
3349 * Returns 0 for success, error for failure.
3350 */
3351static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3352{
3353 struct radeon_ring *ring;
3354 u32 rb_cntl, ib_cntl;
3355 u32 rb_bufsz;
3356 u32 reg_offset, wb_offset;
3357 int i, r;
3358
3359 for (i = 0; i < 2; i++) {
3360 if (i == 0) {
3361 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3362 reg_offset = SDMA0_REGISTER_OFFSET;
3363 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3364 } else {
3365 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3366 reg_offset = SDMA1_REGISTER_OFFSET;
3367 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3368 }
3369
3370 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3371 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3372
3373 /* Set ring buffer size in dwords */
3374 rb_bufsz = drm_order(ring->ring_size / 4);
3375 rb_cntl = rb_bufsz << 1;
3376#ifdef __BIG_ENDIAN
3377 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3378#endif
3379 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3380
3381 /* Initialize the ring buffer's read and write pointers */
3382 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3383 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3384
3385 /* set the wb address whether it's enabled or not */
3386 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3387 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3388 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3389 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3390
3391 if (rdev->wb.enabled)
3392 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3393
3394 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3395 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3396
3397 ring->wptr = 0;
3398 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3399
3400 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3401
3402 /* enable DMA RB */
3403 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3404
3405 ib_cntl = SDMA_IB_ENABLE;
3406#ifdef __BIG_ENDIAN
3407 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3408#endif
3409 /* enable DMA IBs */
3410 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3411
3412 ring->ready = true;
3413
3414 r = radeon_ring_test(rdev, ring->idx, ring);
3415 if (r) {
3416 ring->ready = false;
3417 return r;
3418 }
3419 }
3420
3421 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3422
3423 return 0;
3424}
3425
3426/**
3427 * cik_sdma_rlc_resume - setup and start the async dma engines
3428 *
3429 * @rdev: radeon_device pointer
3430 *
3431 * Set up the compute DMA queues and enable them (CIK).
3432 * Returns 0 for success, error for failure.
3433 */
3434static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3435{
3436 /* XXX todo */
3437 return 0;
3438}
3439
3440/**
3441 * cik_sdma_load_microcode - load the sDMA ME ucode
3442 *
3443 * @rdev: radeon_device pointer
3444 *
3445 * Loads the sDMA0/1 ucode.
3446 * Returns 0 for success, -EINVAL if the ucode is not available.
3447 */
3448static int cik_sdma_load_microcode(struct radeon_device *rdev)
3449{
3450 const __be32 *fw_data;
3451 int i;
3452
3453 if (!rdev->sdma_fw)
3454 return -EINVAL;
3455
3456 /* stop the gfx rings and rlc compute queues */
3457 cik_sdma_gfx_stop(rdev);
3458 cik_sdma_rlc_stop(rdev);
3459
3460 /* halt the MEs */
3461 cik_sdma_enable(rdev, false);
3462
3463 /* sdma0 */
3464 fw_data = (const __be32 *)rdev->sdma_fw->data;
3465 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3466 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3467 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3468 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3469
3470 /* sdma1 */
3471 fw_data = (const __be32 *)rdev->sdma_fw->data;
3472 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3473 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3476
3477 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3478 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3479 return 0;
3480}
3481
3482/**
3483 * cik_sdma_resume - setup and start the async dma engines
3484 *
3485 * @rdev: radeon_device pointer
3486 *
3487 * Set up the DMA engines and enable them (CIK).
3488 * Returns 0 for success, error for failure.
3489 */
3490static int cik_sdma_resume(struct radeon_device *rdev)
3491{
3492 int r;
3493
3494 /* Reset dma */
3495 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3496 RREG32(SRBM_SOFT_RESET);
3497 udelay(50);
3498 WREG32(SRBM_SOFT_RESET, 0);
3499 RREG32(SRBM_SOFT_RESET);
3500
3501 r = cik_sdma_load_microcode(rdev);
3502 if (r)
3503 return r;
3504
3505 /* unhalt the MEs */
3506 cik_sdma_enable(rdev, true);
3507
3508 /* start the gfx rings and rlc compute queues */
3509 r = cik_sdma_gfx_resume(rdev);
3510 if (r)
3511 return r;
3512 r = cik_sdma_rlc_resume(rdev);
3513 if (r)
3514 return r;
3515
3516 return 0;
3517}
3518
3519/**
3520 * cik_sdma_fini - tear down the async dma engines
3521 *
3522 * @rdev: radeon_device pointer
3523 *
3524 * Stop the async dma engines and free the rings (CIK).
3525 */
3526static void cik_sdma_fini(struct radeon_device *rdev)
3527{
3528 /* stop the gfx rings and rlc compute queues */
3529 cik_sdma_gfx_stop(rdev);
3530 cik_sdma_rlc_stop(rdev);
3531 /* halt the MEs */
3532 cik_sdma_enable(rdev, false);
3533 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3534 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3535 /* XXX - compute dma queue tear down */
3536}
3537
3538/**
3539 * cik_copy_dma - copy pages using the DMA engine
3540 *
3541 * @rdev: radeon_device pointer
3542 * @src_offset: src GPU address
3543 * @dst_offset: dst GPU address
3544 * @num_gpu_pages: number of GPU pages to xfer
3545 * @fence: radeon fence object
3546 *
3547 * Copy GPU paging using the DMA engine (CIK).
3548 * Used by the radeon ttm implementation to move pages if
3549 * registered as the asic copy callback.
3550 */
3551int cik_copy_dma(struct radeon_device *rdev,
3552 uint64_t src_offset, uint64_t dst_offset,
3553 unsigned num_gpu_pages,
3554 struct radeon_fence **fence)
3555{
3556 struct radeon_semaphore *sem = NULL;
3557 int ring_index = rdev->asic->copy.dma_ring_index;
3558 struct radeon_ring *ring = &rdev->ring[ring_index];
3559 u32 size_in_bytes, cur_size_in_bytes;
3560 int i, num_loops;
3561 int r = 0;
3562
3563 r = radeon_semaphore_create(rdev, &sem);
3564 if (r) {
3565 DRM_ERROR("radeon: moving bo (%d).\n", r);
3566 return r;
3567 }
3568
3569 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3570 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3571 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3572 if (r) {
3573 DRM_ERROR("radeon: moving bo (%d).\n", r);
3574 radeon_semaphore_free(rdev, &sem, NULL);
3575 return r;
3576 }
3577
3578 if (radeon_fence_need_sync(*fence, ring->idx)) {
3579 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3580 ring->idx);
3581 radeon_fence_note_sync(*fence, ring->idx);
3582 } else {
3583 radeon_semaphore_free(rdev, &sem, NULL);
3584 }
3585
3586 for (i = 0; i < num_loops; i++) {
3587 cur_size_in_bytes = size_in_bytes;
3588 if (cur_size_in_bytes > 0x1fffff)
3589 cur_size_in_bytes = 0x1fffff;
3590 size_in_bytes -= cur_size_in_bytes;
3591 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3592 radeon_ring_write(ring, cur_size_in_bytes);
3593 radeon_ring_write(ring, 0); /* src/dst endian swap */
3594 radeon_ring_write(ring, src_offset & 0xffffffff);
3595 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3596 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3597 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3598 src_offset += cur_size_in_bytes;
3599 dst_offset += cur_size_in_bytes;
3600 }
3601
3602 r = radeon_fence_emit(rdev, fence, ring->idx);
3603 if (r) {
3604 radeon_ring_unlock_undo(rdev, ring);
3605 return r;
3606 }
3607
3608 radeon_ring_unlock_commit(rdev, ring);
3609 radeon_semaphore_free(rdev, &sem, *fence);
3610
3611 return r;
3612}
3613
3614/**
3615 * cik_sdma_ring_test - simple async dma engine test
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ring: radeon_ring structure holding ring information
3619 *
3620 * Test the DMA engine by writing using it to write an
3621 * value to memory. (CIK).
3622 * Returns 0 for success, error for failure.
3623 */
3624int cik_sdma_ring_test(struct radeon_device *rdev,
3625 struct radeon_ring *ring)
3626{
3627 unsigned i;
3628 int r;
3629 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3630 u32 tmp;
3631
3632 if (!ptr) {
3633 DRM_ERROR("invalid vram scratch pointer\n");
3634 return -EINVAL;
3635 }
3636
3637 tmp = 0xCAFEDEAD;
3638 writel(tmp, ptr);
3639
3640 r = radeon_ring_lock(rdev, ring, 4);
3641 if (r) {
3642 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3643 return r;
3644 }
3645 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3646 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3647 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3648 radeon_ring_write(ring, 1); /* number of DWs to follow */
3649 radeon_ring_write(ring, 0xDEADBEEF);
3650 radeon_ring_unlock_commit(rdev, ring);
3651
3652 for (i = 0; i < rdev->usec_timeout; i++) {
3653 tmp = readl(ptr);
3654 if (tmp == 0xDEADBEEF)
3655 break;
3656 DRM_UDELAY(1);
3657 }
3658
3659 if (i < rdev->usec_timeout) {
3660 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3661 } else {
3662 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3663 ring->idx, tmp);
3664 r = -EINVAL;
3665 }
3666 return r;
3667}
3668
3669/**
3670 * cik_sdma_ib_test - test an IB on the DMA engine
3671 *
3672 * @rdev: radeon_device pointer
3673 * @ring: radeon_ring structure holding ring information
3674 *
3675 * Test a simple IB in the DMA ring (CIK).
3676 * Returns 0 on success, error on failure.
3677 */
3678int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3679{
3680 struct radeon_ib ib;
3681 unsigned i;
3682 int r;
3683 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3684 u32 tmp = 0;
3685
3686 if (!ptr) {
3687 DRM_ERROR("invalid vram scratch pointer\n");
3688 return -EINVAL;
3689 }
3690
3691 tmp = 0xCAFEDEAD;
3692 writel(tmp, ptr);
3693
3694 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3695 if (r) {
3696 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3697 return r;
3698 }
3699
3700 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3701 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3702 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3703 ib.ptr[3] = 1;
3704 ib.ptr[4] = 0xDEADBEEF;
3705 ib.length_dw = 5;
3706
3707 r = radeon_ib_schedule(rdev, &ib, NULL);
3708 if (r) {
3709 radeon_ib_free(rdev, &ib);
3710 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3711 return r;
3712 }
3713 r = radeon_fence_wait(ib.fence, false);
3714 if (r) {
3715 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3716 return r;
3717 }
3718 for (i = 0; i < rdev->usec_timeout; i++) {
3719 tmp = readl(ptr);
3720 if (tmp == 0xDEADBEEF)
3721 break;
3722 DRM_UDELAY(1);
3723 }
3724 if (i < rdev->usec_timeout) {
3725 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3726 } else {
3727 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3728 r = -EINVAL;
3729 }
3730 radeon_ib_free(rdev, &ib);
3731 return r;
3732}
3733
Alex Deuchercc066712013-04-09 12:59:51 -04003734
3735static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3736{
3737 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3738 RREG32(GRBM_STATUS));
3739 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3740 RREG32(GRBM_STATUS2));
3741 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3742 RREG32(GRBM_STATUS_SE0));
3743 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3744 RREG32(GRBM_STATUS_SE1));
3745 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3746 RREG32(GRBM_STATUS_SE2));
3747 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3748 RREG32(GRBM_STATUS_SE3));
3749 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3750 RREG32(SRBM_STATUS));
3751 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3752 RREG32(SRBM_STATUS2));
3753 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3754 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3755 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3756 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
Alex Deucher963e81f2013-06-26 17:37:11 -04003757 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3758 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3759 RREG32(CP_STALLED_STAT1));
3760 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3761 RREG32(CP_STALLED_STAT2));
3762 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3763 RREG32(CP_STALLED_STAT3));
3764 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3765 RREG32(CP_CPF_BUSY_STAT));
3766 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3767 RREG32(CP_CPF_STALLED_STAT1));
3768 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3769 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3770 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3771 RREG32(CP_CPC_STALLED_STAT1));
3772 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
Alex Deuchercc066712013-04-09 12:59:51 -04003773}
3774
Alex Deucher6f2043c2013-04-09 12:43:41 -04003775/**
Alex Deuchercc066712013-04-09 12:59:51 -04003776 * cik_gpu_check_soft_reset - check which blocks are busy
3777 *
3778 * @rdev: radeon_device pointer
3779 *
3780 * Check which blocks are busy and return the relevant reset
3781 * mask to be used by cik_gpu_soft_reset().
3782 * Returns a mask of the blocks to be reset.
3783 */
3784static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3785{
3786 u32 reset_mask = 0;
3787 u32 tmp;
3788
3789 /* GRBM_STATUS */
3790 tmp = RREG32(GRBM_STATUS);
3791 if (tmp & (PA_BUSY | SC_BUSY |
3792 BCI_BUSY | SX_BUSY |
3793 TA_BUSY | VGT_BUSY |
3794 DB_BUSY | CB_BUSY |
3795 GDS_BUSY | SPI_BUSY |
3796 IA_BUSY | IA_BUSY_NO_DMA))
3797 reset_mask |= RADEON_RESET_GFX;
3798
3799 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3800 reset_mask |= RADEON_RESET_CP;
3801
3802 /* GRBM_STATUS2 */
3803 tmp = RREG32(GRBM_STATUS2);
3804 if (tmp & RLC_BUSY)
3805 reset_mask |= RADEON_RESET_RLC;
3806
3807 /* SDMA0_STATUS_REG */
3808 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3809 if (!(tmp & SDMA_IDLE))
3810 reset_mask |= RADEON_RESET_DMA;
3811
3812 /* SDMA1_STATUS_REG */
3813 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3814 if (!(tmp & SDMA_IDLE))
3815 reset_mask |= RADEON_RESET_DMA1;
3816
3817 /* SRBM_STATUS2 */
3818 tmp = RREG32(SRBM_STATUS2);
3819 if (tmp & SDMA_BUSY)
3820 reset_mask |= RADEON_RESET_DMA;
3821
3822 if (tmp & SDMA1_BUSY)
3823 reset_mask |= RADEON_RESET_DMA1;
3824
3825 /* SRBM_STATUS */
3826 tmp = RREG32(SRBM_STATUS);
3827
3828 if (tmp & IH_BUSY)
3829 reset_mask |= RADEON_RESET_IH;
3830
3831 if (tmp & SEM_BUSY)
3832 reset_mask |= RADEON_RESET_SEM;
3833
3834 if (tmp & GRBM_RQ_PENDING)
3835 reset_mask |= RADEON_RESET_GRBM;
3836
3837 if (tmp & VMC_BUSY)
3838 reset_mask |= RADEON_RESET_VMC;
3839
3840 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3841 MCC_BUSY | MCD_BUSY))
3842 reset_mask |= RADEON_RESET_MC;
3843
3844 if (evergreen_is_display_hung(rdev))
3845 reset_mask |= RADEON_RESET_DISPLAY;
3846
3847 /* Skip MC reset as it's mostly likely not hung, just busy */
3848 if (reset_mask & RADEON_RESET_MC) {
3849 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3850 reset_mask &= ~RADEON_RESET_MC;
3851 }
3852
3853 return reset_mask;
3854}
3855
3856/**
3857 * cik_gpu_soft_reset - soft reset GPU
3858 *
3859 * @rdev: radeon_device pointer
3860 * @reset_mask: mask of which blocks to reset
3861 *
3862 * Soft reset the blocks specified in @reset_mask.
3863 */
3864static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3865{
3866 struct evergreen_mc_save save;
3867 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3868 u32 tmp;
3869
3870 if (reset_mask == 0)
3871 return;
3872
3873 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3874
3875 cik_print_gpu_status_regs(rdev);
3876 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3877 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3878 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3879 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3880
3881 /* stop the rlc */
3882 cik_rlc_stop(rdev);
3883
3884 /* Disable GFX parsing/prefetching */
3885 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3886
3887 /* Disable MEC parsing/prefetching */
3888 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3889
3890 if (reset_mask & RADEON_RESET_DMA) {
3891 /* sdma0 */
3892 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3893 tmp |= SDMA_HALT;
3894 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3895 }
3896 if (reset_mask & RADEON_RESET_DMA1) {
3897 /* sdma1 */
3898 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3899 tmp |= SDMA_HALT;
3900 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3901 }
3902
3903 evergreen_mc_stop(rdev, &save);
3904 if (evergreen_mc_wait_for_idle(rdev)) {
3905 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3906 }
3907
3908 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3909 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3910
3911 if (reset_mask & RADEON_RESET_CP) {
3912 grbm_soft_reset |= SOFT_RESET_CP;
3913
3914 srbm_soft_reset |= SOFT_RESET_GRBM;
3915 }
3916
3917 if (reset_mask & RADEON_RESET_DMA)
3918 srbm_soft_reset |= SOFT_RESET_SDMA;
3919
3920 if (reset_mask & RADEON_RESET_DMA1)
3921 srbm_soft_reset |= SOFT_RESET_SDMA1;
3922
3923 if (reset_mask & RADEON_RESET_DISPLAY)
3924 srbm_soft_reset |= SOFT_RESET_DC;
3925
3926 if (reset_mask & RADEON_RESET_RLC)
3927 grbm_soft_reset |= SOFT_RESET_RLC;
3928
3929 if (reset_mask & RADEON_RESET_SEM)
3930 srbm_soft_reset |= SOFT_RESET_SEM;
3931
3932 if (reset_mask & RADEON_RESET_IH)
3933 srbm_soft_reset |= SOFT_RESET_IH;
3934
3935 if (reset_mask & RADEON_RESET_GRBM)
3936 srbm_soft_reset |= SOFT_RESET_GRBM;
3937
3938 if (reset_mask & RADEON_RESET_VMC)
3939 srbm_soft_reset |= SOFT_RESET_VMC;
3940
3941 if (!(rdev->flags & RADEON_IS_IGP)) {
3942 if (reset_mask & RADEON_RESET_MC)
3943 srbm_soft_reset |= SOFT_RESET_MC;
3944 }
3945
3946 if (grbm_soft_reset) {
3947 tmp = RREG32(GRBM_SOFT_RESET);
3948 tmp |= grbm_soft_reset;
3949 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3950 WREG32(GRBM_SOFT_RESET, tmp);
3951 tmp = RREG32(GRBM_SOFT_RESET);
3952
3953 udelay(50);
3954
3955 tmp &= ~grbm_soft_reset;
3956 WREG32(GRBM_SOFT_RESET, tmp);
3957 tmp = RREG32(GRBM_SOFT_RESET);
3958 }
3959
3960 if (srbm_soft_reset) {
3961 tmp = RREG32(SRBM_SOFT_RESET);
3962 tmp |= srbm_soft_reset;
3963 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3964 WREG32(SRBM_SOFT_RESET, tmp);
3965 tmp = RREG32(SRBM_SOFT_RESET);
3966
3967 udelay(50);
3968
3969 tmp &= ~srbm_soft_reset;
3970 WREG32(SRBM_SOFT_RESET, tmp);
3971 tmp = RREG32(SRBM_SOFT_RESET);
3972 }
3973
3974 /* Wait a little for things to settle down */
3975 udelay(50);
3976
3977 evergreen_mc_resume(rdev, &save);
3978 udelay(50);
3979
3980 cik_print_gpu_status_regs(rdev);
3981}
3982
3983/**
3984 * cik_asic_reset - soft reset GPU
3985 *
3986 * @rdev: radeon_device pointer
3987 *
3988 * Look up which blocks are hung and attempt
3989 * to reset them.
3990 * Returns 0 for success.
3991 */
3992int cik_asic_reset(struct radeon_device *rdev)
3993{
3994 u32 reset_mask;
3995
3996 reset_mask = cik_gpu_check_soft_reset(rdev);
3997
3998 if (reset_mask)
3999 r600_set_bios_scratch_engine_hung(rdev, true);
4000
4001 cik_gpu_soft_reset(rdev, reset_mask);
4002
4003 reset_mask = cik_gpu_check_soft_reset(rdev);
4004
4005 if (!reset_mask)
4006 r600_set_bios_scratch_engine_hung(rdev, false);
4007
4008 return 0;
4009}
4010
4011/**
4012 * cik_gfx_is_lockup - check if the 3D engine is locked up
Alex Deucher6f2043c2013-04-09 12:43:41 -04004013 *
4014 * @rdev: radeon_device pointer
4015 * @ring: radeon_ring structure holding ring information
4016 *
4017 * Check if the 3D engine is locked up (CIK).
4018 * Returns true if the engine is locked, false if not.
4019 */
Alex Deuchercc066712013-04-09 12:59:51 -04004020bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
Alex Deucher6f2043c2013-04-09 12:43:41 -04004021{
Alex Deuchercc066712013-04-09 12:59:51 -04004022 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
Alex Deucher6f2043c2013-04-09 12:43:41 -04004023
Alex Deuchercc066712013-04-09 12:59:51 -04004024 if (!(reset_mask & (RADEON_RESET_GFX |
4025 RADEON_RESET_COMPUTE |
4026 RADEON_RESET_CP))) {
Alex Deucher6f2043c2013-04-09 12:43:41 -04004027 radeon_ring_lockup_update(ring);
4028 return false;
4029 }
4030 /* force CP activities */
4031 radeon_ring_force_activity(rdev, ring);
4032 return radeon_ring_test_lockup(rdev, ring);
4033}
4034
4035/**
Alex Deucher21a93e12013-04-09 12:47:11 -04004036 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4037 *
4038 * @rdev: radeon_device pointer
4039 * @ring: radeon_ring structure holding ring information
4040 *
4041 * Check if the async DMA engine is locked up (CIK).
4042 * Returns true if the engine appears to be locked up, false if not.
4043 */
4044bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4045{
Alex Deuchercc066712013-04-09 12:59:51 -04004046 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4047 u32 mask;
Alex Deucher21a93e12013-04-09 12:47:11 -04004048
4049 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
Alex Deuchercc066712013-04-09 12:59:51 -04004050 mask = RADEON_RESET_DMA;
Alex Deucher21a93e12013-04-09 12:47:11 -04004051 else
Alex Deuchercc066712013-04-09 12:59:51 -04004052 mask = RADEON_RESET_DMA1;
4053
4054 if (!(reset_mask & mask)) {
Alex Deucher21a93e12013-04-09 12:47:11 -04004055 radeon_ring_lockup_update(ring);
4056 return false;
4057 }
4058 /* force ring activities */
4059 radeon_ring_force_activity(rdev, ring);
4060 return radeon_ring_test_lockup(rdev, ring);
4061}
4062
Alex Deucher1c491652013-04-09 12:45:26 -04004063/* MC */
4064/**
4065 * cik_mc_program - program the GPU memory controller
4066 *
4067 * @rdev: radeon_device pointer
4068 *
4069 * Set the location of vram, gart, and AGP in the GPU's
4070 * physical address space (CIK).
4071 */
4072static void cik_mc_program(struct radeon_device *rdev)
4073{
4074 struct evergreen_mc_save save;
4075 u32 tmp;
4076 int i, j;
4077
4078 /* Initialize HDP */
4079 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4080 WREG32((0x2c14 + j), 0x00000000);
4081 WREG32((0x2c18 + j), 0x00000000);
4082 WREG32((0x2c1c + j), 0x00000000);
4083 WREG32((0x2c20 + j), 0x00000000);
4084 WREG32((0x2c24 + j), 0x00000000);
4085 }
4086 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4087
4088 evergreen_mc_stop(rdev, &save);
4089 if (radeon_mc_wait_for_idle(rdev)) {
4090 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4091 }
4092 /* Lockout access through VGA aperture*/
4093 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4094 /* Update configuration */
4095 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4096 rdev->mc.vram_start >> 12);
4097 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4098 rdev->mc.vram_end >> 12);
4099 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4100 rdev->vram_scratch.gpu_addr >> 12);
4101 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4102 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4103 WREG32(MC_VM_FB_LOCATION, tmp);
4104 /* XXX double check these! */
4105 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4106 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4107 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4108 WREG32(MC_VM_AGP_BASE, 0);
4109 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4110 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4111 if (radeon_mc_wait_for_idle(rdev)) {
4112 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4113 }
4114 evergreen_mc_resume(rdev, &save);
4115 /* we need to own VRAM, so turn off the VGA renderer here
4116 * to stop it overwriting our objects */
4117 rv515_vga_render_disable(rdev);
4118}
4119
4120/**
4121 * cik_mc_init - initialize the memory controller driver params
4122 *
4123 * @rdev: radeon_device pointer
4124 *
4125 * Look up the amount of vram, vram width, and decide how to place
4126 * vram and gart within the GPU's physical address space (CIK).
4127 * Returns 0 for success.
4128 */
4129static int cik_mc_init(struct radeon_device *rdev)
4130{
4131 u32 tmp;
4132 int chansize, numchan;
4133
4134 /* Get VRAM informations */
4135 rdev->mc.vram_is_ddr = true;
4136 tmp = RREG32(MC_ARB_RAMCFG);
4137 if (tmp & CHANSIZE_MASK) {
4138 chansize = 64;
4139 } else {
4140 chansize = 32;
4141 }
4142 tmp = RREG32(MC_SHARED_CHMAP);
4143 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4144 case 0:
4145 default:
4146 numchan = 1;
4147 break;
4148 case 1:
4149 numchan = 2;
4150 break;
4151 case 2:
4152 numchan = 4;
4153 break;
4154 case 3:
4155 numchan = 8;
4156 break;
4157 case 4:
4158 numchan = 3;
4159 break;
4160 case 5:
4161 numchan = 6;
4162 break;
4163 case 6:
4164 numchan = 10;
4165 break;
4166 case 7:
4167 numchan = 12;
4168 break;
4169 case 8:
4170 numchan = 16;
4171 break;
4172 }
4173 rdev->mc.vram_width = numchan * chansize;
4174 /* Could aper size report 0 ? */
4175 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4176 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4177 /* size in MB on si */
4178 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4179 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4180 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4181 si_vram_gtt_location(rdev, &rdev->mc);
4182 radeon_update_bandwidth_info(rdev);
4183
4184 return 0;
4185}
4186
4187/*
4188 * GART
4189 * VMID 0 is the physical GPU addresses as used by the kernel.
4190 * VMIDs 1-15 are used for userspace clients and are handled
4191 * by the radeon vm/hsa code.
4192 */
4193/**
4194 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4195 *
4196 * @rdev: radeon_device pointer
4197 *
4198 * Flush the TLB for the VMID 0 page table (CIK).
4199 */
4200void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4201{
4202 /* flush hdp cache */
4203 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4204
4205 /* bits 0-15 are the VM contexts0-15 */
4206 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4207}
4208
4209/**
4210 * cik_pcie_gart_enable - gart enable
4211 *
4212 * @rdev: radeon_device pointer
4213 *
4214 * This sets up the TLBs, programs the page tables for VMID0,
4215 * sets up the hw for VMIDs 1-15 which are allocated on
4216 * demand, and sets up the global locations for the LDS, GDS,
4217 * and GPUVM for FSA64 clients (CIK).
4218 * Returns 0 for success, errors for failure.
4219 */
4220static int cik_pcie_gart_enable(struct radeon_device *rdev)
4221{
4222 int r, i;
4223
4224 if (rdev->gart.robj == NULL) {
4225 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4226 return -EINVAL;
4227 }
4228 r = radeon_gart_table_vram_pin(rdev);
4229 if (r)
4230 return r;
4231 radeon_gart_restore(rdev);
4232 /* Setup TLB control */
4233 WREG32(MC_VM_MX_L1_TLB_CNTL,
4234 (0xA << 7) |
4235 ENABLE_L1_TLB |
4236 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4237 ENABLE_ADVANCED_DRIVER_MODEL |
4238 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4239 /* Setup L2 cache */
4240 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4241 ENABLE_L2_FRAGMENT_PROCESSING |
4242 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4243 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4244 EFFECTIVE_L2_QUEUE_SIZE(7) |
4245 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4246 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4247 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4248 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4249 /* setup context0 */
4250 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4251 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4252 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4253 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4254 (u32)(rdev->dummy_page.addr >> 12));
4255 WREG32(VM_CONTEXT0_CNTL2, 0);
4256 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4257 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4258
4259 WREG32(0x15D4, 0);
4260 WREG32(0x15D8, 0);
4261 WREG32(0x15DC, 0);
4262
4263 /* empty context1-15 */
4264 /* FIXME start with 4G, once using 2 level pt switch to full
4265 * vm size space
4266 */
4267 /* set vm size, must be a multiple of 4 */
4268 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4269 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4270 for (i = 1; i < 16; i++) {
4271 if (i < 8)
4272 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4273 rdev->gart.table_addr >> 12);
4274 else
4275 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4276 rdev->gart.table_addr >> 12);
4277 }
4278
4279 /* enable context1-15 */
4280 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4281 (u32)(rdev->dummy_page.addr >> 12));
Alex Deuchera00024b2012-09-18 16:06:01 -04004282 WREG32(VM_CONTEXT1_CNTL2, 4);
Alex Deucher1c491652013-04-09 12:45:26 -04004283 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
Alex Deuchera00024b2012-09-18 16:06:01 -04004284 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4285 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4286 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4287 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4288 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4290 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4294 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
Alex Deucher1c491652013-04-09 12:45:26 -04004296
4297 /* TC cache setup ??? */
4298 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4299 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4300 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4301
4302 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4303 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4304 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4305 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4306 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4307
4308 WREG32(TC_CFG_L1_VOLATILE, 0);
4309 WREG32(TC_CFG_L2_VOLATILE, 0);
4310
4311 if (rdev->family == CHIP_KAVERI) {
4312 u32 tmp = RREG32(CHUB_CONTROL);
4313 tmp &= ~BYPASS_VM;
4314 WREG32(CHUB_CONTROL, tmp);
4315 }
4316
4317 /* XXX SH_MEM regs */
4318 /* where to put LDS, scratch, GPUVM in FSA64 space */
Alex Deucherf61d5b462013-08-06 12:40:16 -04004319 mutex_lock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004320 for (i = 0; i < 16; i++) {
Alex Deucherb556b122013-01-29 10:44:22 -05004321 cik_srbm_select(rdev, 0, 0, 0, i);
Alex Deucher21a93e12013-04-09 12:47:11 -04004322 /* CP and shaders */
Alex Deucher1c491652013-04-09 12:45:26 -04004323 WREG32(SH_MEM_CONFIG, 0);
4324 WREG32(SH_MEM_APE1_BASE, 1);
4325 WREG32(SH_MEM_APE1_LIMIT, 0);
4326 WREG32(SH_MEM_BASES, 0);
Alex Deucher21a93e12013-04-09 12:47:11 -04004327 /* SDMA GFX */
4328 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4329 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4330 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4331 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4332 /* XXX SDMA RLC - todo */
Alex Deucher1c491652013-04-09 12:45:26 -04004333 }
Alex Deucherb556b122013-01-29 10:44:22 -05004334 cik_srbm_select(rdev, 0, 0, 0, 0);
Alex Deucherf61d5b462013-08-06 12:40:16 -04004335 mutex_unlock(&rdev->srbm_mutex);
Alex Deucher1c491652013-04-09 12:45:26 -04004336
4337 cik_pcie_gart_tlb_flush(rdev);
4338 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4339 (unsigned)(rdev->mc.gtt_size >> 20),
4340 (unsigned long long)rdev->gart.table_addr);
4341 rdev->gart.ready = true;
4342 return 0;
4343}
4344
4345/**
4346 * cik_pcie_gart_disable - gart disable
4347 *
4348 * @rdev: radeon_device pointer
4349 *
4350 * This disables all VM page table (CIK).
4351 */
4352static void cik_pcie_gart_disable(struct radeon_device *rdev)
4353{
4354 /* Disable all tables */
4355 WREG32(VM_CONTEXT0_CNTL, 0);
4356 WREG32(VM_CONTEXT1_CNTL, 0);
4357 /* Setup TLB control */
4358 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4359 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4360 /* Setup L2 cache */
4361 WREG32(VM_L2_CNTL,
4362 ENABLE_L2_FRAGMENT_PROCESSING |
4363 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4364 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4365 EFFECTIVE_L2_QUEUE_SIZE(7) |
4366 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4367 WREG32(VM_L2_CNTL2, 0);
4368 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4369 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4370 radeon_gart_table_vram_unpin(rdev);
4371}
4372
4373/**
4374 * cik_pcie_gart_fini - vm fini callback
4375 *
4376 * @rdev: radeon_device pointer
4377 *
4378 * Tears down the driver GART/VM setup (CIK).
4379 */
4380static void cik_pcie_gart_fini(struct radeon_device *rdev)
4381{
4382 cik_pcie_gart_disable(rdev);
4383 radeon_gart_table_vram_free(rdev);
4384 radeon_gart_fini(rdev);
4385}
4386
4387/* vm parser */
4388/**
4389 * cik_ib_parse - vm ib_parse callback
4390 *
4391 * @rdev: radeon_device pointer
4392 * @ib: indirect buffer pointer
4393 *
4394 * CIK uses hw IB checking so this is a nop (CIK).
4395 */
4396int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4397{
4398 return 0;
4399}
4400
4401/*
4402 * vm
4403 * VMID 0 is the physical GPU addresses as used by the kernel.
4404 * VMIDs 1-15 are used for userspace clients and are handled
4405 * by the radeon vm/hsa code.
4406 */
4407/**
4408 * cik_vm_init - cik vm init callback
4409 *
4410 * @rdev: radeon_device pointer
4411 *
4412 * Inits cik specific vm parameters (number of VMs, base of vram for
4413 * VMIDs 1-15) (CIK).
4414 * Returns 0 for success.
4415 */
4416int cik_vm_init(struct radeon_device *rdev)
4417{
4418 /* number of VMs */
4419 rdev->vm_manager.nvm = 16;
4420 /* base offset of vram pages */
4421 if (rdev->flags & RADEON_IS_IGP) {
4422 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4423 tmp <<= 22;
4424 rdev->vm_manager.vram_base_offset = tmp;
4425 } else
4426 rdev->vm_manager.vram_base_offset = 0;
4427
4428 return 0;
4429}
4430
4431/**
4432 * cik_vm_fini - cik vm fini callback
4433 *
4434 * @rdev: radeon_device pointer
4435 *
4436 * Tear down any asic specific VM setup (CIK).
4437 */
4438void cik_vm_fini(struct radeon_device *rdev)
4439{
4440}
4441
Alex Deucherf96ab482012-08-31 10:37:47 -04004442/**
Alex Deucher3ec7d112013-06-14 10:42:22 -04004443 * cik_vm_decode_fault - print human readable fault info
4444 *
4445 * @rdev: radeon_device pointer
4446 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4447 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4448 *
4449 * Print human readable fault information (CIK).
4450 */
4451static void cik_vm_decode_fault(struct radeon_device *rdev,
4452 u32 status, u32 addr, u32 mc_client)
4453{
4454 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4455 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4456 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4457 char *block = (char *)&mc_client;
4458
4459 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4460 protections, vmid, addr,
4461 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4462 block, mc_id);
4463}
4464
4465/**
Alex Deucherf96ab482012-08-31 10:37:47 -04004466 * cik_vm_flush - cik vm flush using the CP
4467 *
4468 * @rdev: radeon_device pointer
4469 *
4470 * Update the page table base and flush the VM TLB
4471 * using the CP (CIK).
4472 */
4473void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4474{
4475 struct radeon_ring *ring = &rdev->ring[ridx];
4476
4477 if (vm == NULL)
4478 return;
4479
4480 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4481 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4482 WRITE_DATA_DST_SEL(0)));
4483 if (vm->id < 8) {
4484 radeon_ring_write(ring,
4485 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4486 } else {
4487 radeon_ring_write(ring,
4488 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4489 }
4490 radeon_ring_write(ring, 0);
4491 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4492
4493 /* update SH_MEM_* regs */
4494 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4495 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4496 WRITE_DATA_DST_SEL(0)));
4497 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4498 radeon_ring_write(ring, 0);
4499 radeon_ring_write(ring, VMID(vm->id));
4500
4501 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4502 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4503 WRITE_DATA_DST_SEL(0)));
4504 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4505 radeon_ring_write(ring, 0);
4506
4507 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4508 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4509 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4510 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4511
4512 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4513 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4514 WRITE_DATA_DST_SEL(0)));
4515 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4516 radeon_ring_write(ring, 0);
4517 radeon_ring_write(ring, VMID(0));
4518
4519 /* HDP flush */
4520 /* We should be using the WAIT_REG_MEM packet here like in
4521 * cik_fence_ring_emit(), but it causes the CP to hang in this
4522 * context...
4523 */
4524 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4525 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4526 WRITE_DATA_DST_SEL(0)));
4527 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4528 radeon_ring_write(ring, 0);
4529 radeon_ring_write(ring, 0);
4530
4531 /* bits 0-15 are the VM contexts0-15 */
4532 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4533 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4534 WRITE_DATA_DST_SEL(0)));
4535 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4536 radeon_ring_write(ring, 0);
4537 radeon_ring_write(ring, 1 << vm->id);
4538
Alex Deucherb07fdd32013-04-11 09:36:17 -04004539 /* compute doesn't have PFP */
4540 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4541 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4542 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4543 radeon_ring_write(ring, 0x0);
4544 }
Alex Deucherf96ab482012-08-31 10:37:47 -04004545}
4546
Alex Deucher605de6b2012-10-22 13:04:03 -04004547/**
Alex Deucherd0e092d2012-08-31 11:00:53 -04004548 * cik_vm_set_page - update the page tables using sDMA
4549 *
4550 * @rdev: radeon_device pointer
4551 * @ib: indirect buffer to fill with commands
4552 * @pe: addr of the page entry
4553 * @addr: dst addr to write into pe
4554 * @count: number of page entries to update
4555 * @incr: increase next addr by incr bytes
4556 * @flags: access flags
4557 *
4558 * Update the page tables using CP or sDMA (CIK).
4559 */
4560void cik_vm_set_page(struct radeon_device *rdev,
4561 struct radeon_ib *ib,
4562 uint64_t pe,
4563 uint64_t addr, unsigned count,
4564 uint32_t incr, uint32_t flags)
4565{
4566 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4567 uint64_t value;
4568 unsigned ndw;
4569
4570 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4571 /* CP */
4572 while (count) {
4573 ndw = 2 + count * 2;
4574 if (ndw > 0x3FFE)
4575 ndw = 0x3FFE;
4576
4577 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4578 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4579 WRITE_DATA_DST_SEL(1));
4580 ib->ptr[ib->length_dw++] = pe;
4581 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4582 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4583 if (flags & RADEON_VM_PAGE_SYSTEM) {
4584 value = radeon_vm_map_gart(rdev, addr);
4585 value &= 0xFFFFFFFFFFFFF000ULL;
4586 } else if (flags & RADEON_VM_PAGE_VALID) {
4587 value = addr;
4588 } else {
4589 value = 0;
4590 }
4591 addr += incr;
4592 value |= r600_flags;
4593 ib->ptr[ib->length_dw++] = value;
4594 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4595 }
4596 }
4597 } else {
4598 /* DMA */
4599 if (flags & RADEON_VM_PAGE_SYSTEM) {
4600 while (count) {
4601 ndw = count * 2;
4602 if (ndw > 0xFFFFE)
4603 ndw = 0xFFFFE;
4604
4605 /* for non-physically contiguous pages (system) */
4606 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4607 ib->ptr[ib->length_dw++] = pe;
4608 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4609 ib->ptr[ib->length_dw++] = ndw;
4610 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4611 if (flags & RADEON_VM_PAGE_SYSTEM) {
4612 value = radeon_vm_map_gart(rdev, addr);
4613 value &= 0xFFFFFFFFFFFFF000ULL;
4614 } else if (flags & RADEON_VM_PAGE_VALID) {
4615 value = addr;
4616 } else {
4617 value = 0;
4618 }
4619 addr += incr;
4620 value |= r600_flags;
4621 ib->ptr[ib->length_dw++] = value;
4622 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4623 }
4624 }
4625 } else {
4626 while (count) {
4627 ndw = count;
4628 if (ndw > 0x7FFFF)
4629 ndw = 0x7FFFF;
4630
4631 if (flags & RADEON_VM_PAGE_VALID)
4632 value = addr;
4633 else
4634 value = 0;
4635 /* for physically contiguous pages (vram) */
4636 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4637 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4638 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4639 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4640 ib->ptr[ib->length_dw++] = 0;
4641 ib->ptr[ib->length_dw++] = value; /* value */
4642 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4643 ib->ptr[ib->length_dw++] = incr; /* increment size */
4644 ib->ptr[ib->length_dw++] = 0;
4645 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4646 pe += ndw * 8;
4647 addr += ndw * incr;
4648 count -= ndw;
4649 }
4650 }
4651 while (ib->length_dw & 0x7)
4652 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4653 }
4654}
4655
4656/**
Alex Deucher605de6b2012-10-22 13:04:03 -04004657 * cik_dma_vm_flush - cik vm flush using sDMA
4658 *
4659 * @rdev: radeon_device pointer
4660 *
4661 * Update the page table base and flush the VM TLB
4662 * using sDMA (CIK).
4663 */
4664void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4665{
4666 struct radeon_ring *ring = &rdev->ring[ridx];
4667 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4668 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4669 u32 ref_and_mask;
4670
4671 if (vm == NULL)
4672 return;
4673
4674 if (ridx == R600_RING_TYPE_DMA_INDEX)
4675 ref_and_mask = SDMA0;
4676 else
4677 ref_and_mask = SDMA1;
4678
4679 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4680 if (vm->id < 8) {
4681 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4682 } else {
4683 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4684 }
4685 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4686
4687 /* update SH_MEM_* regs */
4688 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4689 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4690 radeon_ring_write(ring, VMID(vm->id));
4691
4692 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4693 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4694 radeon_ring_write(ring, 0);
4695
4696 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4697 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4698 radeon_ring_write(ring, 0);
4699
4700 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4701 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4702 radeon_ring_write(ring, 1);
4703
4704 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4705 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4706 radeon_ring_write(ring, 0);
4707
4708 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4709 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4710 radeon_ring_write(ring, VMID(0));
4711
4712 /* flush HDP */
4713 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4714 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4715 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4716 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4717 radeon_ring_write(ring, ref_and_mask); /* MASK */
4718 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4719
4720 /* flush TLB */
4721 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4722 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4723 radeon_ring_write(ring, 1 << vm->id);
4724}
4725
Alex Deucherf6796ca2012-11-09 10:44:08 -05004726/*
4727 * RLC
4728 * The RLC is a multi-purpose microengine that handles a
4729 * variety of functions, the most important of which is
4730 * the interrupt controller.
4731 */
Alex Deucher866d83d2013-04-15 17:13:29 -04004732static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4733 bool enable)
Alex Deucherf6796ca2012-11-09 10:44:08 -05004734{
Alex Deucher866d83d2013-04-15 17:13:29 -04004735 u32 tmp = RREG32(CP_INT_CNTL_RING0);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004736
Alex Deucher866d83d2013-04-15 17:13:29 -04004737 if (enable)
4738 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4739 else
4740 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004741 WREG32(CP_INT_CNTL_RING0, tmp);
Alex Deucher866d83d2013-04-15 17:13:29 -04004742}
Alex Deucherf6796ca2012-11-09 10:44:08 -05004743
Alex Deucher866d83d2013-04-15 17:13:29 -04004744static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4745{
4746 u32 tmp;
Alex Deucherf6796ca2012-11-09 10:44:08 -05004747
Alex Deucher866d83d2013-04-15 17:13:29 -04004748 tmp = RREG32(RLC_LB_CNTL);
4749 if (enable)
4750 tmp |= LOAD_BALANCE_ENABLE;
4751 else
4752 tmp &= ~LOAD_BALANCE_ENABLE;
4753 WREG32(RLC_LB_CNTL, tmp);
4754}
Alex Deucherf6796ca2012-11-09 10:44:08 -05004755
Alex Deucher866d83d2013-04-15 17:13:29 -04004756static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4757{
4758 u32 i, j, k;
4759 u32 mask;
Alex Deucherf6796ca2012-11-09 10:44:08 -05004760
4761 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4762 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4763 cik_select_se_sh(rdev, i, j);
4764 for (k = 0; k < rdev->usec_timeout; k++) {
4765 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4766 break;
4767 udelay(1);
4768 }
4769 }
4770 }
4771 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4772
4773 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4774 for (k = 0; k < rdev->usec_timeout; k++) {
4775 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4776 break;
4777 udelay(1);
4778 }
4779}
4780
4781/**
Alex Deucher866d83d2013-04-15 17:13:29 -04004782 * cik_rlc_stop - stop the RLC ME
4783 *
4784 * @rdev: radeon_device pointer
4785 *
4786 * Halt the RLC ME (MicroEngine) (CIK).
4787 */
4788static void cik_rlc_stop(struct radeon_device *rdev)
4789{
4790 u32 tmp;
4791
4792 cik_enable_gui_idle_interrupt(rdev, false);
4793
4794 RREG32(CB_CGTT_SCLK_CTRL);
4795 RREG32(CB_CGTT_SCLK_CTRL);
4796 RREG32(CB_CGTT_SCLK_CTRL);
4797 RREG32(CB_CGTT_SCLK_CTRL);
4798
4799 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4800 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4801
4802 WREG32(RLC_CNTL, 0);
4803
4804 cik_wait_for_rlc_serdes(rdev);
4805}
4806
4807/**
Alex Deucherf6796ca2012-11-09 10:44:08 -05004808 * cik_rlc_start - start the RLC ME
4809 *
4810 * @rdev: radeon_device pointer
4811 *
4812 * Unhalt the RLC ME (MicroEngine) (CIK).
4813 */
4814static void cik_rlc_start(struct radeon_device *rdev)
4815{
Alex Deucherf6796ca2012-11-09 10:44:08 -05004816 WREG32(RLC_CNTL, RLC_ENABLE);
4817
Alex Deucher866d83d2013-04-15 17:13:29 -04004818 cik_enable_gui_idle_interrupt(rdev, true);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004819
4820 udelay(50);
4821}
4822
4823/**
4824 * cik_rlc_resume - setup the RLC hw
4825 *
4826 * @rdev: radeon_device pointer
4827 *
4828 * Initialize the RLC registers, load the ucode,
4829 * and start the RLC (CIK).
4830 * Returns 0 for success, -EINVAL if the ucode is not available.
4831 */
4832static int cik_rlc_resume(struct radeon_device *rdev)
4833{
4834 u32 i, size;
4835 u32 clear_state_info[3];
4836 const __be32 *fw_data;
4837
4838 if (!rdev->rlc_fw)
4839 return -EINVAL;
4840
4841 switch (rdev->family) {
4842 case CHIP_BONAIRE:
4843 default:
4844 size = BONAIRE_RLC_UCODE_SIZE;
4845 break;
4846 case CHIP_KAVERI:
4847 size = KV_RLC_UCODE_SIZE;
4848 break;
4849 case CHIP_KABINI:
4850 size = KB_RLC_UCODE_SIZE;
4851 break;
4852 }
4853
4854 cik_rlc_stop(rdev);
4855
Alex Deucher866d83d2013-04-15 17:13:29 -04004856 si_rlc_reset(rdev);
Alex Deucherf6796ca2012-11-09 10:44:08 -05004857
4858 WREG32(RLC_LB_CNTR_INIT, 0);
4859 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4860
4861 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4862 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4863 WREG32(RLC_LB_PARAMS, 0x00600408);
4864 WREG32(RLC_LB_CNTL, 0x80000004);
4865
4866 WREG32(RLC_MC_CNTL, 0);
4867 WREG32(RLC_UCODE_CNTL, 0);
4868
4869 fw_data = (const __be32 *)rdev->rlc_fw->data;
4870 WREG32(RLC_GPM_UCODE_ADDR, 0);
4871 for (i = 0; i < size; i++)
4872 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4873 WREG32(RLC_GPM_UCODE_ADDR, 0);
4874
Alex Deucher866d83d2013-04-15 17:13:29 -04004875 /* XXX - find out what chips support lbpw */
4876 cik_enable_lbpw(rdev, false);
4877
Alex Deucherf6796ca2012-11-09 10:44:08 -05004878 /* XXX */
4879 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4880 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4881 clear_state_info[2] = 0;//cik_default_size;
4882 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4883 for (i = 0; i < 3; i++)
4884 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4885 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4886
4887 cik_rlc_start(rdev);
4888
4889 return 0;
4890}
Alex Deuchera59781b2012-11-09 10:45:57 -05004891
4892/*
4893 * Interrupts
4894 * Starting with r6xx, interrupts are handled via a ring buffer.
4895 * Ring buffers are areas of GPU accessible memory that the GPU
4896 * writes interrupt vectors into and the host reads vectors out of.
4897 * There is a rptr (read pointer) that determines where the
4898 * host is currently reading, and a wptr (write pointer)
4899 * which determines where the GPU has written. When the
4900 * pointers are equal, the ring is idle. When the GPU
4901 * writes vectors to the ring buffer, it increments the
4902 * wptr. When there is an interrupt, the host then starts
4903 * fetching commands and processing them until the pointers are
4904 * equal again at which point it updates the rptr.
4905 */
4906
4907/**
4908 * cik_enable_interrupts - Enable the interrupt ring buffer
4909 *
4910 * @rdev: radeon_device pointer
4911 *
4912 * Enable the interrupt ring buffer (CIK).
4913 */
4914static void cik_enable_interrupts(struct radeon_device *rdev)
4915{
4916 u32 ih_cntl = RREG32(IH_CNTL);
4917 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4918
4919 ih_cntl |= ENABLE_INTR;
4920 ih_rb_cntl |= IH_RB_ENABLE;
4921 WREG32(IH_CNTL, ih_cntl);
4922 WREG32(IH_RB_CNTL, ih_rb_cntl);
4923 rdev->ih.enabled = true;
4924}
4925
4926/**
4927 * cik_disable_interrupts - Disable the interrupt ring buffer
4928 *
4929 * @rdev: radeon_device pointer
4930 *
4931 * Disable the interrupt ring buffer (CIK).
4932 */
4933static void cik_disable_interrupts(struct radeon_device *rdev)
4934{
4935 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4936 u32 ih_cntl = RREG32(IH_CNTL);
4937
4938 ih_rb_cntl &= ~IH_RB_ENABLE;
4939 ih_cntl &= ~ENABLE_INTR;
4940 WREG32(IH_RB_CNTL, ih_rb_cntl);
4941 WREG32(IH_CNTL, ih_cntl);
4942 /* set rptr, wptr to 0 */
4943 WREG32(IH_RB_RPTR, 0);
4944 WREG32(IH_RB_WPTR, 0);
4945 rdev->ih.enabled = false;
4946 rdev->ih.rptr = 0;
4947}
4948
4949/**
4950 * cik_disable_interrupt_state - Disable all interrupt sources
4951 *
4952 * @rdev: radeon_device pointer
4953 *
4954 * Clear all interrupt enable bits used by the driver (CIK).
4955 */
4956static void cik_disable_interrupt_state(struct radeon_device *rdev)
4957{
4958 u32 tmp;
4959
4960 /* gfx ring */
4961 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
Alex Deucher21a93e12013-04-09 12:47:11 -04004962 /* sdma */
4963 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4964 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4965 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4966 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
Alex Deuchera59781b2012-11-09 10:45:57 -05004967 /* compute queues */
4968 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4969 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4970 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4971 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4972 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4973 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4974 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4975 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4976 /* grbm */
4977 WREG32(GRBM_INT_CNTL, 0);
4978 /* vline/vblank, etc. */
4979 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4980 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4981 if (rdev->num_crtc >= 4) {
4982 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4983 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4984 }
4985 if (rdev->num_crtc >= 6) {
4986 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4987 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4988 }
4989
4990 /* dac hotplug */
4991 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4992
4993 /* digital hotplug */
4994 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4995 WREG32(DC_HPD1_INT_CONTROL, tmp);
4996 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4997 WREG32(DC_HPD2_INT_CONTROL, tmp);
4998 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4999 WREG32(DC_HPD3_INT_CONTROL, tmp);
5000 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5001 WREG32(DC_HPD4_INT_CONTROL, tmp);
5002 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5003 WREG32(DC_HPD5_INT_CONTROL, tmp);
5004 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5005 WREG32(DC_HPD6_INT_CONTROL, tmp);
5006
5007}
5008
5009/**
5010 * cik_irq_init - init and enable the interrupt ring
5011 *
5012 * @rdev: radeon_device pointer
5013 *
5014 * Allocate a ring buffer for the interrupt controller,
5015 * enable the RLC, disable interrupts, enable the IH
5016 * ring buffer and enable it (CIK).
5017 * Called at device load and reume.
5018 * Returns 0 for success, errors for failure.
5019 */
5020static int cik_irq_init(struct radeon_device *rdev)
5021{
5022 int ret = 0;
5023 int rb_bufsz;
5024 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5025
5026 /* allocate ring */
5027 ret = r600_ih_ring_alloc(rdev);
5028 if (ret)
5029 return ret;
5030
5031 /* disable irqs */
5032 cik_disable_interrupts(rdev);
5033
5034 /* init rlc */
5035 ret = cik_rlc_resume(rdev);
5036 if (ret) {
5037 r600_ih_ring_fini(rdev);
5038 return ret;
5039 }
5040
5041 /* setup interrupt control */
5042 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5043 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5044 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5045 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5046 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5047 */
5048 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5049 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5050 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5051 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5052
5053 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5054 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5055
5056 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5057 IH_WPTR_OVERFLOW_CLEAR |
5058 (rb_bufsz << 1));
5059
5060 if (rdev->wb.enabled)
5061 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5062
5063 /* set the writeback address whether it's enabled or not */
5064 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5065 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5066
5067 WREG32(IH_RB_CNTL, ih_rb_cntl);
5068
5069 /* set rptr, wptr to 0 */
5070 WREG32(IH_RB_RPTR, 0);
5071 WREG32(IH_RB_WPTR, 0);
5072
5073 /* Default settings for IH_CNTL (disabled at first) */
5074 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5075 /* RPTR_REARM only works if msi's are enabled */
5076 if (rdev->msi_enabled)
5077 ih_cntl |= RPTR_REARM;
5078 WREG32(IH_CNTL, ih_cntl);
5079
5080 /* force the active interrupt state to all disabled */
5081 cik_disable_interrupt_state(rdev);
5082
5083 pci_set_master(rdev->pdev);
5084
5085 /* enable irqs */
5086 cik_enable_interrupts(rdev);
5087
5088 return ret;
5089}
5090
5091/**
5092 * cik_irq_set - enable/disable interrupt sources
5093 *
5094 * @rdev: radeon_device pointer
5095 *
5096 * Enable interrupt sources on the GPU (vblanks, hpd,
5097 * etc.) (CIK).
5098 * Returns 0 for success, errors for failure.
5099 */
5100int cik_irq_set(struct radeon_device *rdev)
5101{
5102 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5103 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
Alex Deucher2b0781a2013-04-09 14:26:16 -04005104 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5105 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
Alex Deuchera59781b2012-11-09 10:45:57 -05005106 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5107 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5108 u32 grbm_int_cntl = 0;
Alex Deucher21a93e12013-04-09 12:47:11 -04005109 u32 dma_cntl, dma_cntl1;
Alex Deuchera59781b2012-11-09 10:45:57 -05005110
5111 if (!rdev->irq.installed) {
5112 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5113 return -EINVAL;
5114 }
5115 /* don't enable anything if the ih is disabled */
5116 if (!rdev->ih.enabled) {
5117 cik_disable_interrupts(rdev);
5118 /* force the active interrupt state to all disabled */
5119 cik_disable_interrupt_state(rdev);
5120 return 0;
5121 }
5122
5123 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5124 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5125 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5126 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5127 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5128 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5129
Alex Deucher21a93e12013-04-09 12:47:11 -04005130 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5131 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5132
Alex Deucher2b0781a2013-04-09 14:26:16 -04005133 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5134 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5135 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5136 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5137 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5138 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5139 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5140 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5141
Alex Deuchera59781b2012-11-09 10:45:57 -05005142 /* enable CP interrupts on all rings */
5143 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5144 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5145 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5146 }
Alex Deucher2b0781a2013-04-09 14:26:16 -04005147 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5148 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5149 DRM_DEBUG("si_irq_set: sw int cp1\n");
5150 if (ring->me == 1) {
5151 switch (ring->pipe) {
5152 case 0:
5153 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5154 break;
5155 case 1:
5156 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5157 break;
5158 case 2:
5159 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5160 break;
5161 case 3:
5162 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5163 break;
5164 default:
5165 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5166 break;
5167 }
5168 } else if (ring->me == 2) {
5169 switch (ring->pipe) {
5170 case 0:
5171 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5172 break;
5173 case 1:
5174 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5175 break;
5176 case 2:
5177 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5178 break;
5179 case 3:
5180 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5181 break;
5182 default:
5183 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5184 break;
5185 }
5186 } else {
5187 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5188 }
5189 }
5190 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5191 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5192 DRM_DEBUG("si_irq_set: sw int cp2\n");
5193 if (ring->me == 1) {
5194 switch (ring->pipe) {
5195 case 0:
5196 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5197 break;
5198 case 1:
5199 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5200 break;
5201 case 2:
5202 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5203 break;
5204 case 3:
5205 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5206 break;
5207 default:
5208 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5209 break;
5210 }
5211 } else if (ring->me == 2) {
5212 switch (ring->pipe) {
5213 case 0:
5214 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5215 break;
5216 case 1:
5217 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5218 break;
5219 case 2:
5220 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5221 break;
5222 case 3:
5223 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5224 break;
5225 default:
5226 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5227 break;
5228 }
5229 } else {
5230 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5231 }
5232 }
Alex Deuchera59781b2012-11-09 10:45:57 -05005233
Alex Deucher21a93e12013-04-09 12:47:11 -04005234 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5235 DRM_DEBUG("cik_irq_set: sw int dma\n");
5236 dma_cntl |= TRAP_ENABLE;
5237 }
5238
5239 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5240 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5241 dma_cntl1 |= TRAP_ENABLE;
5242 }
5243
Alex Deuchera59781b2012-11-09 10:45:57 -05005244 if (rdev->irq.crtc_vblank_int[0] ||
5245 atomic_read(&rdev->irq.pflip[0])) {
5246 DRM_DEBUG("cik_irq_set: vblank 0\n");
5247 crtc1 |= VBLANK_INTERRUPT_MASK;
5248 }
5249 if (rdev->irq.crtc_vblank_int[1] ||
5250 atomic_read(&rdev->irq.pflip[1])) {
5251 DRM_DEBUG("cik_irq_set: vblank 1\n");
5252 crtc2 |= VBLANK_INTERRUPT_MASK;
5253 }
5254 if (rdev->irq.crtc_vblank_int[2] ||
5255 atomic_read(&rdev->irq.pflip[2])) {
5256 DRM_DEBUG("cik_irq_set: vblank 2\n");
5257 crtc3 |= VBLANK_INTERRUPT_MASK;
5258 }
5259 if (rdev->irq.crtc_vblank_int[3] ||
5260 atomic_read(&rdev->irq.pflip[3])) {
5261 DRM_DEBUG("cik_irq_set: vblank 3\n");
5262 crtc4 |= VBLANK_INTERRUPT_MASK;
5263 }
5264 if (rdev->irq.crtc_vblank_int[4] ||
5265 atomic_read(&rdev->irq.pflip[4])) {
5266 DRM_DEBUG("cik_irq_set: vblank 4\n");
5267 crtc5 |= VBLANK_INTERRUPT_MASK;
5268 }
5269 if (rdev->irq.crtc_vblank_int[5] ||
5270 atomic_read(&rdev->irq.pflip[5])) {
5271 DRM_DEBUG("cik_irq_set: vblank 5\n");
5272 crtc6 |= VBLANK_INTERRUPT_MASK;
5273 }
5274 if (rdev->irq.hpd[0]) {
5275 DRM_DEBUG("cik_irq_set: hpd 1\n");
5276 hpd1 |= DC_HPDx_INT_EN;
5277 }
5278 if (rdev->irq.hpd[1]) {
5279 DRM_DEBUG("cik_irq_set: hpd 2\n");
5280 hpd2 |= DC_HPDx_INT_EN;
5281 }
5282 if (rdev->irq.hpd[2]) {
5283 DRM_DEBUG("cik_irq_set: hpd 3\n");
5284 hpd3 |= DC_HPDx_INT_EN;
5285 }
5286 if (rdev->irq.hpd[3]) {
5287 DRM_DEBUG("cik_irq_set: hpd 4\n");
5288 hpd4 |= DC_HPDx_INT_EN;
5289 }
5290 if (rdev->irq.hpd[4]) {
5291 DRM_DEBUG("cik_irq_set: hpd 5\n");
5292 hpd5 |= DC_HPDx_INT_EN;
5293 }
5294 if (rdev->irq.hpd[5]) {
5295 DRM_DEBUG("cik_irq_set: hpd 6\n");
5296 hpd6 |= DC_HPDx_INT_EN;
5297 }
5298
5299 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5300
Alex Deucher21a93e12013-04-09 12:47:11 -04005301 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5302 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5303
Alex Deucher2b0781a2013-04-09 14:26:16 -04005304 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5305 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5306 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5307 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5308 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5309 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5310 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5311 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5312
Alex Deuchera59781b2012-11-09 10:45:57 -05005313 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5314
5315 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5316 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5317 if (rdev->num_crtc >= 4) {
5318 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5319 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5320 }
5321 if (rdev->num_crtc >= 6) {
5322 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5323 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5324 }
5325
5326 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5327 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5328 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5329 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5330 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5331 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5332
5333 return 0;
5334}
5335
5336/**
5337 * cik_irq_ack - ack interrupt sources
5338 *
5339 * @rdev: radeon_device pointer
5340 *
5341 * Ack interrupt sources on the GPU (vblanks, hpd,
5342 * etc.) (CIK). Certain interrupts sources are sw
5343 * generated and do not require an explicit ack.
5344 */
5345static inline void cik_irq_ack(struct radeon_device *rdev)
5346{
5347 u32 tmp;
5348
5349 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5350 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5351 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5352 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5353 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5354 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5355 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5356
5357 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5358 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5359 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5360 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5361 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5362 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5363 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5364 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5365
5366 if (rdev->num_crtc >= 4) {
5367 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5368 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5369 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5370 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5371 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5372 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5373 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5374 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5375 }
5376
5377 if (rdev->num_crtc >= 6) {
5378 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5379 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5380 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5381 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5382 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5383 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5384 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5385 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5386 }
5387
5388 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5389 tmp = RREG32(DC_HPD1_INT_CONTROL);
5390 tmp |= DC_HPDx_INT_ACK;
5391 WREG32(DC_HPD1_INT_CONTROL, tmp);
5392 }
5393 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5394 tmp = RREG32(DC_HPD2_INT_CONTROL);
5395 tmp |= DC_HPDx_INT_ACK;
5396 WREG32(DC_HPD2_INT_CONTROL, tmp);
5397 }
5398 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5399 tmp = RREG32(DC_HPD3_INT_CONTROL);
5400 tmp |= DC_HPDx_INT_ACK;
5401 WREG32(DC_HPD3_INT_CONTROL, tmp);
5402 }
5403 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5404 tmp = RREG32(DC_HPD4_INT_CONTROL);
5405 tmp |= DC_HPDx_INT_ACK;
5406 WREG32(DC_HPD4_INT_CONTROL, tmp);
5407 }
5408 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5409 tmp = RREG32(DC_HPD5_INT_CONTROL);
5410 tmp |= DC_HPDx_INT_ACK;
5411 WREG32(DC_HPD5_INT_CONTROL, tmp);
5412 }
5413 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5414 tmp = RREG32(DC_HPD5_INT_CONTROL);
5415 tmp |= DC_HPDx_INT_ACK;
5416 WREG32(DC_HPD6_INT_CONTROL, tmp);
5417 }
5418}
5419
5420/**
5421 * cik_irq_disable - disable interrupts
5422 *
5423 * @rdev: radeon_device pointer
5424 *
5425 * Disable interrupts on the hw (CIK).
5426 */
5427static void cik_irq_disable(struct radeon_device *rdev)
5428{
5429 cik_disable_interrupts(rdev);
5430 /* Wait and acknowledge irq */
5431 mdelay(1);
5432 cik_irq_ack(rdev);
5433 cik_disable_interrupt_state(rdev);
5434}
5435
5436/**
5437 * cik_irq_disable - disable interrupts for suspend
5438 *
5439 * @rdev: radeon_device pointer
5440 *
5441 * Disable interrupts and stop the RLC (CIK).
5442 * Used for suspend.
5443 */
5444static void cik_irq_suspend(struct radeon_device *rdev)
5445{
5446 cik_irq_disable(rdev);
5447 cik_rlc_stop(rdev);
5448}
5449
5450/**
5451 * cik_irq_fini - tear down interrupt support
5452 *
5453 * @rdev: radeon_device pointer
5454 *
5455 * Disable interrupts on the hw and free the IH ring
5456 * buffer (CIK).
5457 * Used for driver unload.
5458 */
5459static void cik_irq_fini(struct radeon_device *rdev)
5460{
5461 cik_irq_suspend(rdev);
5462 r600_ih_ring_fini(rdev);
5463}
5464
5465/**
5466 * cik_get_ih_wptr - get the IH ring buffer wptr
5467 *
5468 * @rdev: radeon_device pointer
5469 *
5470 * Get the IH ring buffer wptr from either the register
5471 * or the writeback memory buffer (CIK). Also check for
5472 * ring buffer overflow and deal with it.
5473 * Used by cik_irq_process().
5474 * Returns the value of the wptr.
5475 */
5476static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5477{
5478 u32 wptr, tmp;
5479
5480 if (rdev->wb.enabled)
5481 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5482 else
5483 wptr = RREG32(IH_RB_WPTR);
5484
5485 if (wptr & RB_OVERFLOW) {
5486 /* When a ring buffer overflow happen start parsing interrupt
5487 * from the last not overwritten vector (wptr + 16). Hopefully
5488 * this should allow us to catchup.
5489 */
5490 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5491 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5492 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5493 tmp = RREG32(IH_RB_CNTL);
5494 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5495 WREG32(IH_RB_CNTL, tmp);
5496 }
5497 return (wptr & rdev->ih.ptr_mask);
5498}
5499
5500/* CIK IV Ring
5501 * Each IV ring entry is 128 bits:
5502 * [7:0] - interrupt source id
5503 * [31:8] - reserved
5504 * [59:32] - interrupt source data
5505 * [63:60] - reserved
Alex Deucher21a93e12013-04-09 12:47:11 -04005506 * [71:64] - RINGID
5507 * CP:
5508 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
Alex Deuchera59781b2012-11-09 10:45:57 -05005509 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5510 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5511 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5512 * PIPE_ID - ME0 0=3D
5513 * - ME1&2 compute dispatcher (4 pipes each)
Alex Deucher21a93e12013-04-09 12:47:11 -04005514 * SDMA:
5515 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5516 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5517 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
Alex Deuchera59781b2012-11-09 10:45:57 -05005518 * [79:72] - VMID
5519 * [95:80] - PASID
5520 * [127:96] - reserved
5521 */
5522/**
5523 * cik_irq_process - interrupt handler
5524 *
5525 * @rdev: radeon_device pointer
5526 *
5527 * Interrupt hander (CIK). Walk the IH ring,
5528 * ack interrupts and schedule work to handle
5529 * interrupt events.
5530 * Returns irq process return code.
5531 */
5532int cik_irq_process(struct radeon_device *rdev)
5533{
Alex Deucher2b0781a2013-04-09 14:26:16 -04005534 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5535 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
Alex Deuchera59781b2012-11-09 10:45:57 -05005536 u32 wptr;
5537 u32 rptr;
5538 u32 src_id, src_data, ring_id;
5539 u8 me_id, pipe_id, queue_id;
5540 u32 ring_index;
5541 bool queue_hotplug = false;
5542 bool queue_reset = false;
Alex Deucher3ec7d112013-06-14 10:42:22 -04005543 u32 addr, status, mc_client;
Alex Deuchera59781b2012-11-09 10:45:57 -05005544
5545 if (!rdev->ih.enabled || rdev->shutdown)
5546 return IRQ_NONE;
5547
5548 wptr = cik_get_ih_wptr(rdev);
5549
5550restart_ih:
5551 /* is somebody else already processing irqs? */
5552 if (atomic_xchg(&rdev->ih.lock, 1))
5553 return IRQ_NONE;
5554
5555 rptr = rdev->ih.rptr;
5556 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5557
5558 /* Order reading of wptr vs. reading of IH ring data */
5559 rmb();
5560
5561 /* display interrupts */
5562 cik_irq_ack(rdev);
5563
5564 while (rptr != wptr) {
5565 /* wptr/rptr are in bytes! */
5566 ring_index = rptr / 4;
5567 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5568 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5569 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
Alex Deuchera59781b2012-11-09 10:45:57 -05005570
5571 switch (src_id) {
5572 case 1: /* D1 vblank/vline */
5573 switch (src_data) {
5574 case 0: /* D1 vblank */
5575 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5576 if (rdev->irq.crtc_vblank_int[0]) {
5577 drm_handle_vblank(rdev->ddev, 0);
5578 rdev->pm.vblank_sync = true;
5579 wake_up(&rdev->irq.vblank_queue);
5580 }
5581 if (atomic_read(&rdev->irq.pflip[0]))
5582 radeon_crtc_handle_flip(rdev, 0);
5583 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5584 DRM_DEBUG("IH: D1 vblank\n");
5585 }
5586 break;
5587 case 1: /* D1 vline */
5588 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5589 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5590 DRM_DEBUG("IH: D1 vline\n");
5591 }
5592 break;
5593 default:
5594 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5595 break;
5596 }
5597 break;
5598 case 2: /* D2 vblank/vline */
5599 switch (src_data) {
5600 case 0: /* D2 vblank */
5601 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5602 if (rdev->irq.crtc_vblank_int[1]) {
5603 drm_handle_vblank(rdev->ddev, 1);
5604 rdev->pm.vblank_sync = true;
5605 wake_up(&rdev->irq.vblank_queue);
5606 }
5607 if (atomic_read(&rdev->irq.pflip[1]))
5608 radeon_crtc_handle_flip(rdev, 1);
5609 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5610 DRM_DEBUG("IH: D2 vblank\n");
5611 }
5612 break;
5613 case 1: /* D2 vline */
5614 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5615 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5616 DRM_DEBUG("IH: D2 vline\n");
5617 }
5618 break;
5619 default:
5620 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5621 break;
5622 }
5623 break;
5624 case 3: /* D3 vblank/vline */
5625 switch (src_data) {
5626 case 0: /* D3 vblank */
5627 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5628 if (rdev->irq.crtc_vblank_int[2]) {
5629 drm_handle_vblank(rdev->ddev, 2);
5630 rdev->pm.vblank_sync = true;
5631 wake_up(&rdev->irq.vblank_queue);
5632 }
5633 if (atomic_read(&rdev->irq.pflip[2]))
5634 radeon_crtc_handle_flip(rdev, 2);
5635 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5636 DRM_DEBUG("IH: D3 vblank\n");
5637 }
5638 break;
5639 case 1: /* D3 vline */
5640 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5641 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5642 DRM_DEBUG("IH: D3 vline\n");
5643 }
5644 break;
5645 default:
5646 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5647 break;
5648 }
5649 break;
5650 case 4: /* D4 vblank/vline */
5651 switch (src_data) {
5652 case 0: /* D4 vblank */
5653 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5654 if (rdev->irq.crtc_vblank_int[3]) {
5655 drm_handle_vblank(rdev->ddev, 3);
5656 rdev->pm.vblank_sync = true;
5657 wake_up(&rdev->irq.vblank_queue);
5658 }
5659 if (atomic_read(&rdev->irq.pflip[3]))
5660 radeon_crtc_handle_flip(rdev, 3);
5661 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5662 DRM_DEBUG("IH: D4 vblank\n");
5663 }
5664 break;
5665 case 1: /* D4 vline */
5666 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5667 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5668 DRM_DEBUG("IH: D4 vline\n");
5669 }
5670 break;
5671 default:
5672 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5673 break;
5674 }
5675 break;
5676 case 5: /* D5 vblank/vline */
5677 switch (src_data) {
5678 case 0: /* D5 vblank */
5679 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5680 if (rdev->irq.crtc_vblank_int[4]) {
5681 drm_handle_vblank(rdev->ddev, 4);
5682 rdev->pm.vblank_sync = true;
5683 wake_up(&rdev->irq.vblank_queue);
5684 }
5685 if (atomic_read(&rdev->irq.pflip[4]))
5686 radeon_crtc_handle_flip(rdev, 4);
5687 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5688 DRM_DEBUG("IH: D5 vblank\n");
5689 }
5690 break;
5691 case 1: /* D5 vline */
5692 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5693 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5694 DRM_DEBUG("IH: D5 vline\n");
5695 }
5696 break;
5697 default:
5698 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5699 break;
5700 }
5701 break;
5702 case 6: /* D6 vblank/vline */
5703 switch (src_data) {
5704 case 0: /* D6 vblank */
5705 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5706 if (rdev->irq.crtc_vblank_int[5]) {
5707 drm_handle_vblank(rdev->ddev, 5);
5708 rdev->pm.vblank_sync = true;
5709 wake_up(&rdev->irq.vblank_queue);
5710 }
5711 if (atomic_read(&rdev->irq.pflip[5]))
5712 radeon_crtc_handle_flip(rdev, 5);
5713 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5714 DRM_DEBUG("IH: D6 vblank\n");
5715 }
5716 break;
5717 case 1: /* D6 vline */
5718 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5719 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5720 DRM_DEBUG("IH: D6 vline\n");
5721 }
5722 break;
5723 default:
5724 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5725 break;
5726 }
5727 break;
5728 case 42: /* HPD hotplug */
5729 switch (src_data) {
5730 case 0:
5731 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5732 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5733 queue_hotplug = true;
5734 DRM_DEBUG("IH: HPD1\n");
5735 }
5736 break;
5737 case 1:
5738 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5739 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5740 queue_hotplug = true;
5741 DRM_DEBUG("IH: HPD2\n");
5742 }
5743 break;
5744 case 2:
5745 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5746 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5747 queue_hotplug = true;
5748 DRM_DEBUG("IH: HPD3\n");
5749 }
5750 break;
5751 case 3:
5752 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5753 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5754 queue_hotplug = true;
5755 DRM_DEBUG("IH: HPD4\n");
5756 }
5757 break;
5758 case 4:
5759 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5760 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5761 queue_hotplug = true;
5762 DRM_DEBUG("IH: HPD5\n");
5763 }
5764 break;
5765 case 5:
5766 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5767 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5768 queue_hotplug = true;
5769 DRM_DEBUG("IH: HPD6\n");
5770 }
5771 break;
5772 default:
5773 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5774 break;
5775 }
5776 break;
Alex Deucher9d97c992012-09-06 14:24:48 -04005777 case 146:
5778 case 147:
Alex Deucher3ec7d112013-06-14 10:42:22 -04005779 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5780 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5781 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
Alex Deucher9d97c992012-09-06 14:24:48 -04005782 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5783 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005784 addr);
Alex Deucher9d97c992012-09-06 14:24:48 -04005785 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
Alex Deucher3ec7d112013-06-14 10:42:22 -04005786 status);
5787 cik_vm_decode_fault(rdev, status, addr, mc_client);
Alex Deucher9d97c992012-09-06 14:24:48 -04005788 /* reset addr and status */
5789 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5790 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005791 case 176: /* GFX RB CP_INT */
5792 case 177: /* GFX IB CP_INT */
5793 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5794 break;
5795 case 181: /* CP EOP event */
5796 DRM_DEBUG("IH: CP EOP\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005797 /* XXX check the bitfield order! */
5798 me_id = (ring_id & 0x60) >> 5;
5799 pipe_id = (ring_id & 0x18) >> 3;
5800 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005801 switch (me_id) {
5802 case 0:
5803 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5804 break;
5805 case 1:
Alex Deuchera59781b2012-11-09 10:45:57 -05005806 case 2:
Alex Deucher2b0781a2013-04-09 14:26:16 -04005807 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5808 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5809 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5810 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
Alex Deuchera59781b2012-11-09 10:45:57 -05005811 break;
5812 }
5813 break;
5814 case 184: /* CP Privileged reg access */
5815 DRM_ERROR("Illegal register access in command stream\n");
5816 /* XXX check the bitfield order! */
5817 me_id = (ring_id & 0x60) >> 5;
5818 pipe_id = (ring_id & 0x18) >> 3;
5819 queue_id = (ring_id & 0x7) >> 0;
5820 switch (me_id) {
5821 case 0:
5822 /* This results in a full GPU reset, but all we need to do is soft
5823 * reset the CP for gfx
5824 */
5825 queue_reset = true;
5826 break;
5827 case 1:
5828 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005829 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005830 break;
5831 case 2:
5832 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005833 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005834 break;
5835 }
5836 break;
5837 case 185: /* CP Privileged inst */
5838 DRM_ERROR("Illegal instruction in command stream\n");
Alex Deucher21a93e12013-04-09 12:47:11 -04005839 /* XXX check the bitfield order! */
5840 me_id = (ring_id & 0x60) >> 5;
5841 pipe_id = (ring_id & 0x18) >> 3;
5842 queue_id = (ring_id & 0x7) >> 0;
Alex Deuchera59781b2012-11-09 10:45:57 -05005843 switch (me_id) {
5844 case 0:
5845 /* This results in a full GPU reset, but all we need to do is soft
5846 * reset the CP for gfx
5847 */
5848 queue_reset = true;
5849 break;
5850 case 1:
5851 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005852 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005853 break;
5854 case 2:
5855 /* XXX compute */
Alex Deucher2b0781a2013-04-09 14:26:16 -04005856 queue_reset = true;
Alex Deuchera59781b2012-11-09 10:45:57 -05005857 break;
5858 }
5859 break;
Alex Deucher21a93e12013-04-09 12:47:11 -04005860 case 224: /* SDMA trap event */
5861 /* XXX check the bitfield order! */
5862 me_id = (ring_id & 0x3) >> 0;
5863 queue_id = (ring_id & 0xc) >> 2;
5864 DRM_DEBUG("IH: SDMA trap\n");
5865 switch (me_id) {
5866 case 0:
5867 switch (queue_id) {
5868 case 0:
5869 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5870 break;
5871 case 1:
5872 /* XXX compute */
5873 break;
5874 case 2:
5875 /* XXX compute */
5876 break;
5877 }
5878 break;
5879 case 1:
5880 switch (queue_id) {
5881 case 0:
5882 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5883 break;
5884 case 1:
5885 /* XXX compute */
5886 break;
5887 case 2:
5888 /* XXX compute */
5889 break;
5890 }
5891 break;
5892 }
5893 break;
5894 case 241: /* SDMA Privileged inst */
5895 case 247: /* SDMA Privileged inst */
5896 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5897 /* XXX check the bitfield order! */
5898 me_id = (ring_id & 0x3) >> 0;
5899 queue_id = (ring_id & 0xc) >> 2;
5900 switch (me_id) {
5901 case 0:
5902 switch (queue_id) {
5903 case 0:
5904 queue_reset = true;
5905 break;
5906 case 1:
5907 /* XXX compute */
5908 queue_reset = true;
5909 break;
5910 case 2:
5911 /* XXX compute */
5912 queue_reset = true;
5913 break;
5914 }
5915 break;
5916 case 1:
5917 switch (queue_id) {
5918 case 0:
5919 queue_reset = true;
5920 break;
5921 case 1:
5922 /* XXX compute */
5923 queue_reset = true;
5924 break;
5925 case 2:
5926 /* XXX compute */
5927 queue_reset = true;
5928 break;
5929 }
5930 break;
5931 }
5932 break;
Alex Deuchera59781b2012-11-09 10:45:57 -05005933 case 233: /* GUI IDLE */
5934 DRM_DEBUG("IH: GUI idle\n");
5935 break;
5936 default:
5937 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5938 break;
5939 }
5940
5941 /* wptr/rptr are in bytes! */
5942 rptr += 16;
5943 rptr &= rdev->ih.ptr_mask;
5944 }
5945 if (queue_hotplug)
5946 schedule_work(&rdev->hotplug_work);
5947 if (queue_reset)
5948 schedule_work(&rdev->reset_work);
5949 rdev->ih.rptr = rptr;
5950 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5951 atomic_set(&rdev->ih.lock, 0);
5952
5953 /* make sure wptr hasn't changed while processing */
5954 wptr = cik_get_ih_wptr(rdev);
5955 if (wptr != rptr)
5956 goto restart_ih;
5957
5958 return IRQ_HANDLED;
5959}
Alex Deucher7bf94a22012-08-17 11:48:29 -04005960
5961/*
5962 * startup/shutdown callbacks
5963 */
5964/**
5965 * cik_startup - program the asic to a functional state
5966 *
5967 * @rdev: radeon_device pointer
5968 *
5969 * Programs the asic to a functional state (CIK).
5970 * Called by cik_init() and cik_resume().
5971 * Returns 0 for success, error for failure.
5972 */
5973static int cik_startup(struct radeon_device *rdev)
5974{
5975 struct radeon_ring *ring;
5976 int r;
5977
Alex Deucher8a7cd272013-08-06 11:29:39 -04005978 /* enable pcie gen2/3 link */
5979 cik_pcie_gen3_enable(rdev);
Alex Deucher7235711a42013-04-04 13:58:09 -04005980 /* enable aspm */
5981 cik_program_aspm(rdev);
Alex Deucher8a7cd272013-08-06 11:29:39 -04005982
Alex Deucher6fab3feb2013-08-04 12:13:17 -04005983 cik_mc_program(rdev);
5984
Alex Deucher7bf94a22012-08-17 11:48:29 -04005985 if (rdev->flags & RADEON_IS_IGP) {
5986 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5987 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5988 r = cik_init_microcode(rdev);
5989 if (r) {
5990 DRM_ERROR("Failed to load firmware!\n");
5991 return r;
5992 }
5993 }
5994 } else {
5995 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5996 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5997 !rdev->mc_fw) {
5998 r = cik_init_microcode(rdev);
5999 if (r) {
6000 DRM_ERROR("Failed to load firmware!\n");
6001 return r;
6002 }
6003 }
6004
6005 r = ci_mc_load_microcode(rdev);
6006 if (r) {
6007 DRM_ERROR("Failed to load MC firmware!\n");
6008 return r;
6009 }
6010 }
6011
6012 r = r600_vram_scratch_init(rdev);
6013 if (r)
6014 return r;
6015
Alex Deucher7bf94a22012-08-17 11:48:29 -04006016 r = cik_pcie_gart_enable(rdev);
6017 if (r)
6018 return r;
6019 cik_gpu_init(rdev);
6020
6021 /* allocate rlc buffers */
6022 r = si_rlc_init(rdev);
6023 if (r) {
6024 DRM_ERROR("Failed to init rlc BOs!\n");
6025 return r;
6026 }
6027
6028 /* allocate wb buffer */
6029 r = radeon_wb_init(rdev);
6030 if (r)
6031 return r;
6032
Alex Deucher963e81f2013-06-26 17:37:11 -04006033 /* allocate mec buffers */
6034 r = cik_mec_init(rdev);
6035 if (r) {
6036 DRM_ERROR("Failed to init MEC BOs!\n");
6037 return r;
6038 }
6039
Alex Deucher7bf94a22012-08-17 11:48:29 -04006040 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6041 if (r) {
6042 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6043 return r;
6044 }
6045
Alex Deucher963e81f2013-06-26 17:37:11 -04006046 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6047 if (r) {
6048 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6049 return r;
6050 }
6051
6052 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6053 if (r) {
6054 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6055 return r;
6056 }
6057
Alex Deucher7bf94a22012-08-17 11:48:29 -04006058 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6059 if (r) {
6060 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6061 return r;
6062 }
6063
6064 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6065 if (r) {
6066 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6067 return r;
6068 }
6069
Christian König87167bb2013-04-09 13:39:21 -04006070 r = cik_uvd_resume(rdev);
6071 if (!r) {
6072 r = radeon_fence_driver_start_ring(rdev,
6073 R600_RING_TYPE_UVD_INDEX);
6074 if (r)
6075 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6076 }
6077 if (r)
6078 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6079
Alex Deucher7bf94a22012-08-17 11:48:29 -04006080 /* Enable IRQ */
6081 if (!rdev->irq.installed) {
6082 r = radeon_irq_kms_init(rdev);
6083 if (r)
6084 return r;
6085 }
6086
6087 r = cik_irq_init(rdev);
6088 if (r) {
6089 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6090 radeon_irq_kms_fini(rdev);
6091 return r;
6092 }
6093 cik_irq_set(rdev);
6094
6095 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6096 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6097 CP_RB0_RPTR, CP_RB0_WPTR,
6098 0, 0xfffff, RADEON_CP_PACKET2);
6099 if (r)
6100 return r;
6101
Alex Deucher963e81f2013-06-26 17:37:11 -04006102 /* set up the compute queues */
Alex Deucher2615b532013-06-03 11:21:58 -04006103 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006104 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6105 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6106 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006107 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006108 if (r)
6109 return r;
6110 ring->me = 1; /* first MEC */
6111 ring->pipe = 0; /* first pipe */
6112 ring->queue = 0; /* first queue */
6113 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6114
Alex Deucher2615b532013-06-03 11:21:58 -04006115 /* type-2 packets are deprecated on MEC, use type-3 instead */
Alex Deucher963e81f2013-06-26 17:37:11 -04006116 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6117 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6118 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
Alex Deucher2615b532013-06-03 11:21:58 -04006119 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
Alex Deucher963e81f2013-06-26 17:37:11 -04006120 if (r)
6121 return r;
6122 /* dGPU only have 1 MEC */
6123 ring->me = 1; /* first MEC */
6124 ring->pipe = 0; /* first pipe */
6125 ring->queue = 1; /* second queue */
6126 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6127
Alex Deucher7bf94a22012-08-17 11:48:29 -04006128 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6129 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6130 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6131 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6132 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6133 if (r)
6134 return r;
6135
6136 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6137 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6138 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6139 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6140 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6141 if (r)
6142 return r;
6143
6144 r = cik_cp_resume(rdev);
6145 if (r)
6146 return r;
6147
6148 r = cik_sdma_resume(rdev);
6149 if (r)
6150 return r;
6151
Christian König87167bb2013-04-09 13:39:21 -04006152 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6153 if (ring->ring_size) {
6154 r = radeon_ring_init(rdev, ring, ring->ring_size,
6155 R600_WB_UVD_RPTR_OFFSET,
6156 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6157 0, 0xfffff, RADEON_CP_PACKET2);
6158 if (!r)
6159 r = r600_uvd_init(rdev);
6160 if (r)
6161 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6162 }
6163
Alex Deucher7bf94a22012-08-17 11:48:29 -04006164 r = radeon_ib_pool_init(rdev);
6165 if (r) {
6166 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6167 return r;
6168 }
6169
6170 r = radeon_vm_manager_init(rdev);
6171 if (r) {
6172 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6173 return r;
6174 }
6175
6176 return 0;
6177}
6178
6179/**
6180 * cik_resume - resume the asic to a functional state
6181 *
6182 * @rdev: radeon_device pointer
6183 *
6184 * Programs the asic to a functional state (CIK).
6185 * Called at resume.
6186 * Returns 0 for success, error for failure.
6187 */
6188int cik_resume(struct radeon_device *rdev)
6189{
6190 int r;
6191
6192 /* post card */
6193 atom_asic_init(rdev->mode_info.atom_context);
6194
Alex Deucher0aafd312013-04-09 14:43:30 -04006195 /* init golden registers */
6196 cik_init_golden_registers(rdev);
6197
Alex Deucher7bf94a22012-08-17 11:48:29 -04006198 rdev->accel_working = true;
6199 r = cik_startup(rdev);
6200 if (r) {
6201 DRM_ERROR("cik startup failed on resume\n");
6202 rdev->accel_working = false;
6203 return r;
6204 }
6205
6206 return r;
6207
6208}
6209
6210/**
6211 * cik_suspend - suspend the asic
6212 *
6213 * @rdev: radeon_device pointer
6214 *
6215 * Bring the chip into a state suitable for suspend (CIK).
6216 * Called at suspend.
6217 * Returns 0 for success.
6218 */
6219int cik_suspend(struct radeon_device *rdev)
6220{
6221 radeon_vm_manager_fini(rdev);
6222 cik_cp_enable(rdev, false);
6223 cik_sdma_enable(rdev, false);
Christian König2858c002013-08-01 17:34:07 +02006224 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006225 radeon_uvd_suspend(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006226 cik_irq_suspend(rdev);
6227 radeon_wb_disable(rdev);
6228 cik_pcie_gart_disable(rdev);
6229 return 0;
6230}
6231
6232/* Plan is to move initialization in that function and use
6233 * helper function so that radeon_device_init pretty much
6234 * do nothing more than calling asic specific function. This
6235 * should also allow to remove a bunch of callback function
6236 * like vram_info.
6237 */
6238/**
6239 * cik_init - asic specific driver and hw init
6240 *
6241 * @rdev: radeon_device pointer
6242 *
6243 * Setup asic specific driver variables and program the hw
6244 * to a functional state (CIK).
6245 * Called at driver startup.
6246 * Returns 0 for success, errors for failure.
6247 */
6248int cik_init(struct radeon_device *rdev)
6249{
6250 struct radeon_ring *ring;
6251 int r;
6252
6253 /* Read BIOS */
6254 if (!radeon_get_bios(rdev)) {
6255 if (ASIC_IS_AVIVO(rdev))
6256 return -EINVAL;
6257 }
6258 /* Must be an ATOMBIOS */
6259 if (!rdev->is_atom_bios) {
6260 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6261 return -EINVAL;
6262 }
6263 r = radeon_atombios_init(rdev);
6264 if (r)
6265 return r;
6266
6267 /* Post card if necessary */
6268 if (!radeon_card_posted(rdev)) {
6269 if (!rdev->bios) {
6270 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6271 return -EINVAL;
6272 }
6273 DRM_INFO("GPU not posted. posting now...\n");
6274 atom_asic_init(rdev->mode_info.atom_context);
6275 }
Alex Deucher0aafd312013-04-09 14:43:30 -04006276 /* init golden registers */
6277 cik_init_golden_registers(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006278 /* Initialize scratch registers */
6279 cik_scratch_init(rdev);
6280 /* Initialize surface registers */
6281 radeon_surface_init(rdev);
6282 /* Initialize clocks */
6283 radeon_get_clock_info(rdev->ddev);
6284
6285 /* Fence driver */
6286 r = radeon_fence_driver_init(rdev);
6287 if (r)
6288 return r;
6289
6290 /* initialize memory controller */
6291 r = cik_mc_init(rdev);
6292 if (r)
6293 return r;
6294 /* Memory manager */
6295 r = radeon_bo_init(rdev);
6296 if (r)
6297 return r;
6298
6299 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6300 ring->ring_obj = NULL;
6301 r600_ring_init(rdev, ring, 1024 * 1024);
6302
Alex Deucher963e81f2013-06-26 17:37:11 -04006303 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6304 ring->ring_obj = NULL;
6305 r600_ring_init(rdev, ring, 1024 * 1024);
6306 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6307 if (r)
6308 return r;
6309
6310 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6311 ring->ring_obj = NULL;
6312 r600_ring_init(rdev, ring, 1024 * 1024);
6313 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6314 if (r)
6315 return r;
6316
Alex Deucher7bf94a22012-08-17 11:48:29 -04006317 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6318 ring->ring_obj = NULL;
6319 r600_ring_init(rdev, ring, 256 * 1024);
6320
6321 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6322 ring->ring_obj = NULL;
6323 r600_ring_init(rdev, ring, 256 * 1024);
6324
Christian König87167bb2013-04-09 13:39:21 -04006325 r = radeon_uvd_init(rdev);
6326 if (!r) {
6327 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6328 ring->ring_obj = NULL;
6329 r600_ring_init(rdev, ring, 4096);
6330 }
6331
Alex Deucher7bf94a22012-08-17 11:48:29 -04006332 rdev->ih.ring_obj = NULL;
6333 r600_ih_ring_init(rdev, 64 * 1024);
6334
6335 r = r600_pcie_gart_init(rdev);
6336 if (r)
6337 return r;
6338
6339 rdev->accel_working = true;
6340 r = cik_startup(rdev);
6341 if (r) {
6342 dev_err(rdev->dev, "disabling GPU acceleration\n");
6343 cik_cp_fini(rdev);
6344 cik_sdma_fini(rdev);
6345 cik_irq_fini(rdev);
6346 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006347 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006348 radeon_wb_fini(rdev);
6349 radeon_ib_pool_fini(rdev);
6350 radeon_vm_manager_fini(rdev);
6351 radeon_irq_kms_fini(rdev);
6352 cik_pcie_gart_fini(rdev);
6353 rdev->accel_working = false;
6354 }
6355
6356 /* Don't start up if the MC ucode is missing.
6357 * The default clocks and voltages before the MC ucode
6358 * is loaded are not suffient for advanced operations.
6359 */
6360 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6361 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6362 return -EINVAL;
6363 }
6364
6365 return 0;
6366}
6367
6368/**
6369 * cik_fini - asic specific driver and hw fini
6370 *
6371 * @rdev: radeon_device pointer
6372 *
6373 * Tear down the asic specific driver variables and program the hw
6374 * to an idle state (CIK).
6375 * Called at driver unload.
6376 */
6377void cik_fini(struct radeon_device *rdev)
6378{
6379 cik_cp_fini(rdev);
6380 cik_sdma_fini(rdev);
6381 cik_irq_fini(rdev);
6382 si_rlc_fini(rdev);
Alex Deucher963e81f2013-06-26 17:37:11 -04006383 cik_mec_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006384 radeon_wb_fini(rdev);
6385 radeon_vm_manager_fini(rdev);
6386 radeon_ib_pool_fini(rdev);
6387 radeon_irq_kms_fini(rdev);
Christian König2858c002013-08-01 17:34:07 +02006388 r600_uvd_stop(rdev);
Christian König87167bb2013-04-09 13:39:21 -04006389 radeon_uvd_fini(rdev);
Alex Deucher7bf94a22012-08-17 11:48:29 -04006390 cik_pcie_gart_fini(rdev);
6391 r600_vram_scratch_fini(rdev);
6392 radeon_gem_fini(rdev);
6393 radeon_fence_driver_fini(rdev);
6394 radeon_bo_fini(rdev);
6395 radeon_atombios_fini(rdev);
6396 kfree(rdev->bios);
6397 rdev->bios = NULL;
6398}
Alex Deuchercd84a272012-07-20 17:13:13 -04006399
6400/* display watermark setup */
6401/**
6402 * dce8_line_buffer_adjust - Set up the line buffer
6403 *
6404 * @rdev: radeon_device pointer
6405 * @radeon_crtc: the selected display controller
6406 * @mode: the current display mode on the selected display
6407 * controller
6408 *
6409 * Setup up the line buffer allocation for
6410 * the selected display controller (CIK).
6411 * Returns the line buffer size in pixels.
6412 */
6413static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6414 struct radeon_crtc *radeon_crtc,
6415 struct drm_display_mode *mode)
6416{
6417 u32 tmp;
6418
6419 /*
6420 * Line Buffer Setup
6421 * There are 6 line buffers, one for each display controllers.
6422 * There are 3 partitions per LB. Select the number of partitions
6423 * to enable based on the display width. For display widths larger
6424 * than 4096, you need use to use 2 display controllers and combine
6425 * them using the stereo blender.
6426 */
6427 if (radeon_crtc->base.enabled && mode) {
6428 if (mode->crtc_hdisplay < 1920)
6429 tmp = 1;
6430 else if (mode->crtc_hdisplay < 2560)
6431 tmp = 2;
6432 else if (mode->crtc_hdisplay < 4096)
6433 tmp = 0;
6434 else {
6435 DRM_DEBUG_KMS("Mode too big for LB!\n");
6436 tmp = 0;
6437 }
6438 } else
6439 tmp = 1;
6440
6441 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6442 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6443
6444 if (radeon_crtc->base.enabled && mode) {
6445 switch (tmp) {
6446 case 0:
6447 default:
6448 return 4096 * 2;
6449 case 1:
6450 return 1920 * 2;
6451 case 2:
6452 return 2560 * 2;
6453 }
6454 }
6455
6456 /* controller not enabled, so no lb used */
6457 return 0;
6458}
6459
6460/**
6461 * cik_get_number_of_dram_channels - get the number of dram channels
6462 *
6463 * @rdev: radeon_device pointer
6464 *
6465 * Look up the number of video ram channels (CIK).
6466 * Used for display watermark bandwidth calculations
6467 * Returns the number of dram channels
6468 */
6469static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6470{
6471 u32 tmp = RREG32(MC_SHARED_CHMAP);
6472
6473 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6474 case 0:
6475 default:
6476 return 1;
6477 case 1:
6478 return 2;
6479 case 2:
6480 return 4;
6481 case 3:
6482 return 8;
6483 case 4:
6484 return 3;
6485 case 5:
6486 return 6;
6487 case 6:
6488 return 10;
6489 case 7:
6490 return 12;
6491 case 8:
6492 return 16;
6493 }
6494}
6495
6496struct dce8_wm_params {
6497 u32 dram_channels; /* number of dram channels */
6498 u32 yclk; /* bandwidth per dram data pin in kHz */
6499 u32 sclk; /* engine clock in kHz */
6500 u32 disp_clk; /* display clock in kHz */
6501 u32 src_width; /* viewport width */
6502 u32 active_time; /* active display time in ns */
6503 u32 blank_time; /* blank time in ns */
6504 bool interlaced; /* mode is interlaced */
6505 fixed20_12 vsc; /* vertical scale ratio */
6506 u32 num_heads; /* number of active crtcs */
6507 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6508 u32 lb_size; /* line buffer allocated to pipe */
6509 u32 vtaps; /* vertical scaler taps */
6510};
6511
6512/**
6513 * dce8_dram_bandwidth - get the dram bandwidth
6514 *
6515 * @wm: watermark calculation data
6516 *
6517 * Calculate the raw dram bandwidth (CIK).
6518 * Used for display watermark bandwidth calculations
6519 * Returns the dram bandwidth in MBytes/s
6520 */
6521static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6522{
6523 /* Calculate raw DRAM Bandwidth */
6524 fixed20_12 dram_efficiency; /* 0.7 */
6525 fixed20_12 yclk, dram_channels, bandwidth;
6526 fixed20_12 a;
6527
6528 a.full = dfixed_const(1000);
6529 yclk.full = dfixed_const(wm->yclk);
6530 yclk.full = dfixed_div(yclk, a);
6531 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6532 a.full = dfixed_const(10);
6533 dram_efficiency.full = dfixed_const(7);
6534 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6535 bandwidth.full = dfixed_mul(dram_channels, yclk);
6536 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6537
6538 return dfixed_trunc(bandwidth);
6539}
6540
6541/**
6542 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6543 *
6544 * @wm: watermark calculation data
6545 *
6546 * Calculate the dram bandwidth used for display (CIK).
6547 * Used for display watermark bandwidth calculations
6548 * Returns the dram bandwidth for display in MBytes/s
6549 */
6550static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6551{
6552 /* Calculate DRAM Bandwidth and the part allocated to display. */
6553 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6554 fixed20_12 yclk, dram_channels, bandwidth;
6555 fixed20_12 a;
6556
6557 a.full = dfixed_const(1000);
6558 yclk.full = dfixed_const(wm->yclk);
6559 yclk.full = dfixed_div(yclk, a);
6560 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6561 a.full = dfixed_const(10);
6562 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6563 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6564 bandwidth.full = dfixed_mul(dram_channels, yclk);
6565 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6566
6567 return dfixed_trunc(bandwidth);
6568}
6569
6570/**
6571 * dce8_data_return_bandwidth - get the data return bandwidth
6572 *
6573 * @wm: watermark calculation data
6574 *
6575 * Calculate the data return bandwidth used for display (CIK).
6576 * Used for display watermark bandwidth calculations
6577 * Returns the data return bandwidth in MBytes/s
6578 */
6579static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6580{
6581 /* Calculate the display Data return Bandwidth */
6582 fixed20_12 return_efficiency; /* 0.8 */
6583 fixed20_12 sclk, bandwidth;
6584 fixed20_12 a;
6585
6586 a.full = dfixed_const(1000);
6587 sclk.full = dfixed_const(wm->sclk);
6588 sclk.full = dfixed_div(sclk, a);
6589 a.full = dfixed_const(10);
6590 return_efficiency.full = dfixed_const(8);
6591 return_efficiency.full = dfixed_div(return_efficiency, a);
6592 a.full = dfixed_const(32);
6593 bandwidth.full = dfixed_mul(a, sclk);
6594 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6595
6596 return dfixed_trunc(bandwidth);
6597}
6598
6599/**
6600 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6601 *
6602 * @wm: watermark calculation data
6603 *
6604 * Calculate the dmif bandwidth used for display (CIK).
6605 * Used for display watermark bandwidth calculations
6606 * Returns the dmif bandwidth in MBytes/s
6607 */
6608static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6609{
6610 /* Calculate the DMIF Request Bandwidth */
6611 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6612 fixed20_12 disp_clk, bandwidth;
6613 fixed20_12 a, b;
6614
6615 a.full = dfixed_const(1000);
6616 disp_clk.full = dfixed_const(wm->disp_clk);
6617 disp_clk.full = dfixed_div(disp_clk, a);
6618 a.full = dfixed_const(32);
6619 b.full = dfixed_mul(a, disp_clk);
6620
6621 a.full = dfixed_const(10);
6622 disp_clk_request_efficiency.full = dfixed_const(8);
6623 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6624
6625 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6626
6627 return dfixed_trunc(bandwidth);
6628}
6629
6630/**
6631 * dce8_available_bandwidth - get the min available bandwidth
6632 *
6633 * @wm: watermark calculation data
6634 *
6635 * Calculate the min available bandwidth used for display (CIK).
6636 * Used for display watermark bandwidth calculations
6637 * Returns the min available bandwidth in MBytes/s
6638 */
6639static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6640{
6641 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6642 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6643 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6644 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6645
6646 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6647}
6648
6649/**
6650 * dce8_average_bandwidth - get the average available bandwidth
6651 *
6652 * @wm: watermark calculation data
6653 *
6654 * Calculate the average available bandwidth used for display (CIK).
6655 * Used for display watermark bandwidth calculations
6656 * Returns the average available bandwidth in MBytes/s
6657 */
6658static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6659{
6660 /* Calculate the display mode Average Bandwidth
6661 * DisplayMode should contain the source and destination dimensions,
6662 * timing, etc.
6663 */
6664 fixed20_12 bpp;
6665 fixed20_12 line_time;
6666 fixed20_12 src_width;
6667 fixed20_12 bandwidth;
6668 fixed20_12 a;
6669
6670 a.full = dfixed_const(1000);
6671 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6672 line_time.full = dfixed_div(line_time, a);
6673 bpp.full = dfixed_const(wm->bytes_per_pixel);
6674 src_width.full = dfixed_const(wm->src_width);
6675 bandwidth.full = dfixed_mul(src_width, bpp);
6676 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6677 bandwidth.full = dfixed_div(bandwidth, line_time);
6678
6679 return dfixed_trunc(bandwidth);
6680}
6681
6682/**
6683 * dce8_latency_watermark - get the latency watermark
6684 *
6685 * @wm: watermark calculation data
6686 *
6687 * Calculate the latency watermark (CIK).
6688 * Used for display watermark bandwidth calculations
6689 * Returns the latency watermark in ns
6690 */
6691static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6692{
6693 /* First calculate the latency in ns */
6694 u32 mc_latency = 2000; /* 2000 ns. */
6695 u32 available_bandwidth = dce8_available_bandwidth(wm);
6696 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6697 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6698 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6699 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6700 (wm->num_heads * cursor_line_pair_return_time);
6701 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6702 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6703 u32 tmp, dmif_size = 12288;
6704 fixed20_12 a, b, c;
6705
6706 if (wm->num_heads == 0)
6707 return 0;
6708
6709 a.full = dfixed_const(2);
6710 b.full = dfixed_const(1);
6711 if ((wm->vsc.full > a.full) ||
6712 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6713 (wm->vtaps >= 5) ||
6714 ((wm->vsc.full >= a.full) && wm->interlaced))
6715 max_src_lines_per_dst_line = 4;
6716 else
6717 max_src_lines_per_dst_line = 2;
6718
6719 a.full = dfixed_const(available_bandwidth);
6720 b.full = dfixed_const(wm->num_heads);
6721 a.full = dfixed_div(a, b);
6722
6723 b.full = dfixed_const(mc_latency + 512);
6724 c.full = dfixed_const(wm->disp_clk);
6725 b.full = dfixed_div(b, c);
6726
6727 c.full = dfixed_const(dmif_size);
6728 b.full = dfixed_div(c, b);
6729
6730 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6731
6732 b.full = dfixed_const(1000);
6733 c.full = dfixed_const(wm->disp_clk);
6734 b.full = dfixed_div(c, b);
6735 c.full = dfixed_const(wm->bytes_per_pixel);
6736 b.full = dfixed_mul(b, c);
6737
6738 lb_fill_bw = min(tmp, dfixed_trunc(b));
6739
6740 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6741 b.full = dfixed_const(1000);
6742 c.full = dfixed_const(lb_fill_bw);
6743 b.full = dfixed_div(c, b);
6744 a.full = dfixed_div(a, b);
6745 line_fill_time = dfixed_trunc(a);
6746
6747 if (line_fill_time < wm->active_time)
6748 return latency;
6749 else
6750 return latency + (line_fill_time - wm->active_time);
6751
6752}
6753
6754/**
6755 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6756 * average and available dram bandwidth
6757 *
6758 * @wm: watermark calculation data
6759 *
6760 * Check if the display average bandwidth fits in the display
6761 * dram bandwidth (CIK).
6762 * Used for display watermark bandwidth calculations
6763 * Returns true if the display fits, false if not.
6764 */
6765static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6766{
6767 if (dce8_average_bandwidth(wm) <=
6768 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6769 return true;
6770 else
6771 return false;
6772}
6773
6774/**
6775 * dce8_average_bandwidth_vs_available_bandwidth - check
6776 * average and available bandwidth
6777 *
6778 * @wm: watermark calculation data
6779 *
6780 * Check if the display average bandwidth fits in the display
6781 * available bandwidth (CIK).
6782 * Used for display watermark bandwidth calculations
6783 * Returns true if the display fits, false if not.
6784 */
6785static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6786{
6787 if (dce8_average_bandwidth(wm) <=
6788 (dce8_available_bandwidth(wm) / wm->num_heads))
6789 return true;
6790 else
6791 return false;
6792}
6793
6794/**
6795 * dce8_check_latency_hiding - check latency hiding
6796 *
6797 * @wm: watermark calculation data
6798 *
6799 * Check latency hiding (CIK).
6800 * Used for display watermark bandwidth calculations
6801 * Returns true if the display fits, false if not.
6802 */
6803static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6804{
6805 u32 lb_partitions = wm->lb_size / wm->src_width;
6806 u32 line_time = wm->active_time + wm->blank_time;
6807 u32 latency_tolerant_lines;
6808 u32 latency_hiding;
6809 fixed20_12 a;
6810
6811 a.full = dfixed_const(1);
6812 if (wm->vsc.full > a.full)
6813 latency_tolerant_lines = 1;
6814 else {
6815 if (lb_partitions <= (wm->vtaps + 1))
6816 latency_tolerant_lines = 1;
6817 else
6818 latency_tolerant_lines = 2;
6819 }
6820
6821 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6822
6823 if (dce8_latency_watermark(wm) <= latency_hiding)
6824 return true;
6825 else
6826 return false;
6827}
6828
6829/**
6830 * dce8_program_watermarks - program display watermarks
6831 *
6832 * @rdev: radeon_device pointer
6833 * @radeon_crtc: the selected display controller
6834 * @lb_size: line buffer size
6835 * @num_heads: number of display controllers in use
6836 *
6837 * Calculate and program the display watermarks for the
6838 * selected display controller (CIK).
6839 */
6840static void dce8_program_watermarks(struct radeon_device *rdev,
6841 struct radeon_crtc *radeon_crtc,
6842 u32 lb_size, u32 num_heads)
6843{
6844 struct drm_display_mode *mode = &radeon_crtc->base.mode;
Alex Deucher58ea2de2013-01-24 10:03:39 -05006845 struct dce8_wm_params wm_low, wm_high;
Alex Deuchercd84a272012-07-20 17:13:13 -04006846 u32 pixel_period;
6847 u32 line_time = 0;
6848 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6849 u32 tmp, wm_mask;
6850
6851 if (radeon_crtc->base.enabled && num_heads && mode) {
6852 pixel_period = 1000000 / (u32)mode->clock;
6853 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6854
Alex Deucher58ea2de2013-01-24 10:03:39 -05006855 /* watermark for high clocks */
6856 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6857 rdev->pm.dpm_enabled) {
6858 wm_high.yclk =
6859 radeon_dpm_get_mclk(rdev, false) * 10;
6860 wm_high.sclk =
6861 radeon_dpm_get_sclk(rdev, false) * 10;
6862 } else {
6863 wm_high.yclk = rdev->pm.current_mclk * 10;
6864 wm_high.sclk = rdev->pm.current_sclk * 10;
6865 }
6866
6867 wm_high.disp_clk = mode->clock;
6868 wm_high.src_width = mode->crtc_hdisplay;
6869 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
6870 wm_high.blank_time = line_time - wm_high.active_time;
6871 wm_high.interlaced = false;
Alex Deuchercd84a272012-07-20 17:13:13 -04006872 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006873 wm_high.interlaced = true;
6874 wm_high.vsc = radeon_crtc->vsc;
6875 wm_high.vtaps = 1;
Alex Deuchercd84a272012-07-20 17:13:13 -04006876 if (radeon_crtc->rmx_type != RMX_OFF)
Alex Deucher58ea2de2013-01-24 10:03:39 -05006877 wm_high.vtaps = 2;
6878 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
6879 wm_high.lb_size = lb_size;
6880 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
6881 wm_high.num_heads = num_heads;
Alex Deuchercd84a272012-07-20 17:13:13 -04006882
6883 /* set for high clocks */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006884 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
Alex Deuchercd84a272012-07-20 17:13:13 -04006885
6886 /* possibly force display priority to high */
6887 /* should really do this at mode validation time... */
Alex Deucher58ea2de2013-01-24 10:03:39 -05006888 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
6889 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
6890 !dce8_check_latency_hiding(&wm_high) ||
6891 (rdev->disp_priority == 2)) {
6892 DRM_DEBUG_KMS("force priority to high\n");
6893 }
6894
6895 /* watermark for low clocks */
6896 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
6897 rdev->pm.dpm_enabled) {
6898 wm_low.yclk =
6899 radeon_dpm_get_mclk(rdev, true) * 10;
6900 wm_low.sclk =
6901 radeon_dpm_get_sclk(rdev, true) * 10;
6902 } else {
6903 wm_low.yclk = rdev->pm.current_mclk * 10;
6904 wm_low.sclk = rdev->pm.current_sclk * 10;
6905 }
6906
6907 wm_low.disp_clk = mode->clock;
6908 wm_low.src_width = mode->crtc_hdisplay;
6909 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
6910 wm_low.blank_time = line_time - wm_low.active_time;
6911 wm_low.interlaced = false;
6912 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6913 wm_low.interlaced = true;
6914 wm_low.vsc = radeon_crtc->vsc;
6915 wm_low.vtaps = 1;
6916 if (radeon_crtc->rmx_type != RMX_OFF)
6917 wm_low.vtaps = 2;
6918 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
6919 wm_low.lb_size = lb_size;
6920 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
6921 wm_low.num_heads = num_heads;
6922
6923 /* set for low clocks */
6924 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
6925
6926 /* possibly force display priority to high */
6927 /* should really do this at mode validation time... */
6928 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
6929 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
6930 !dce8_check_latency_hiding(&wm_low) ||
Alex Deuchercd84a272012-07-20 17:13:13 -04006931 (rdev->disp_priority == 2)) {
6932 DRM_DEBUG_KMS("force priority to high\n");
6933 }
6934 }
6935
6936 /* select wm A */
6937 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6938 tmp = wm_mask;
6939 tmp &= ~LATENCY_WATERMARK_MASK(3);
6940 tmp |= LATENCY_WATERMARK_MASK(1);
6941 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6942 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6943 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6944 LATENCY_HIGH_WATERMARK(line_time)));
6945 /* select wm B */
6946 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6947 tmp &= ~LATENCY_WATERMARK_MASK(3);
6948 tmp |= LATENCY_WATERMARK_MASK(2);
6949 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6950 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6951 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6952 LATENCY_HIGH_WATERMARK(line_time)));
6953 /* restore original selection */
6954 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
Alex Deucher58ea2de2013-01-24 10:03:39 -05006955
6956 /* save values for DPM */
6957 radeon_crtc->line_time = line_time;
6958 radeon_crtc->wm_high = latency_watermark_a;
6959 radeon_crtc->wm_low = latency_watermark_b;
Alex Deuchercd84a272012-07-20 17:13:13 -04006960}
6961
6962/**
6963 * dce8_bandwidth_update - program display watermarks
6964 *
6965 * @rdev: radeon_device pointer
6966 *
6967 * Calculate and program the display watermarks and line
6968 * buffer allocation (CIK).
6969 */
6970void dce8_bandwidth_update(struct radeon_device *rdev)
6971{
6972 struct drm_display_mode *mode = NULL;
6973 u32 num_heads = 0, lb_size;
6974 int i;
6975
6976 radeon_update_display_priority(rdev);
6977
6978 for (i = 0; i < rdev->num_crtc; i++) {
6979 if (rdev->mode_info.crtcs[i]->base.enabled)
6980 num_heads++;
6981 }
6982 for (i = 0; i < rdev->num_crtc; i++) {
6983 mode = &rdev->mode_info.crtcs[i]->base.mode;
6984 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6985 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6986 }
6987}
Alex Deucher44fa3462012-12-18 22:17:00 -05006988
6989/**
6990 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6991 *
6992 * @rdev: radeon_device pointer
6993 *
6994 * Fetches a GPU clock counter snapshot (SI).
6995 * Returns the 64 bit clock counter snapshot.
6996 */
6997uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6998{
6999 uint64_t clock;
7000
7001 mutex_lock(&rdev->gpu_clock_mutex);
7002 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7003 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7004 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7005 mutex_unlock(&rdev->gpu_clock_mutex);
7006 return clock;
7007}
7008
Christian König87167bb2013-04-09 13:39:21 -04007009static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
7010 u32 cntl_reg, u32 status_reg)
7011{
7012 int r, i;
7013 struct atom_clock_dividers dividers;
7014 uint32_t tmp;
7015
7016 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
7017 clock, false, &dividers);
7018 if (r)
7019 return r;
7020
7021 tmp = RREG32_SMC(cntl_reg);
7022 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
7023 tmp |= dividers.post_divider;
7024 WREG32_SMC(cntl_reg, tmp);
7025
7026 for (i = 0; i < 100; i++) {
7027 if (RREG32_SMC(status_reg) & DCLK_STATUS)
7028 break;
7029 mdelay(10);
7030 }
7031 if (i == 100)
7032 return -ETIMEDOUT;
7033
7034 return 0;
7035}
7036
7037int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7038{
7039 int r = 0;
7040
7041 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
7042 if (r)
7043 return r;
7044
7045 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
7046 return r;
7047}
7048
7049int cik_uvd_resume(struct radeon_device *rdev)
7050{
7051 uint64_t addr;
7052 uint32_t size;
7053 int r;
7054
7055 r = radeon_uvd_resume(rdev);
7056 if (r)
7057 return r;
7058
7059 /* programm the VCPU memory controller bits 0-27 */
7060 addr = rdev->uvd.gpu_addr >> 3;
Christian König4ad9c1c2013-08-05 14:10:55 +02007061 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
Christian König87167bb2013-04-09 13:39:21 -04007062 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
7063 WREG32(UVD_VCPU_CACHE_SIZE0, size);
7064
7065 addr += size;
7066 size = RADEON_UVD_STACK_SIZE >> 3;
7067 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
7068 WREG32(UVD_VCPU_CACHE_SIZE1, size);
7069
7070 addr += size;
7071 size = RADEON_UVD_HEAP_SIZE >> 3;
7072 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7073 WREG32(UVD_VCPU_CACHE_SIZE2, size);
7074
7075 /* bits 28-31 */
7076 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7077 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7078
7079 /* bits 32-39 */
7080 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7081 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7082
7083 return 0;
7084}
Alex Deucher8a7cd272013-08-06 11:29:39 -04007085
7086static void cik_pcie_gen3_enable(struct radeon_device *rdev)
7087{
7088 struct pci_dev *root = rdev->pdev->bus->self;
7089 int bridge_pos, gpu_pos;
7090 u32 speed_cntl, mask, current_data_rate;
7091 int ret, i;
7092 u16 tmp16;
7093
7094 if (radeon_pcie_gen2 == 0)
7095 return;
7096
7097 if (rdev->flags & RADEON_IS_IGP)
7098 return;
7099
7100 if (!(rdev->flags & RADEON_IS_PCIE))
7101 return;
7102
7103 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7104 if (ret != 0)
7105 return;
7106
7107 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7108 return;
7109
7110 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7111 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7112 LC_CURRENT_DATA_RATE_SHIFT;
7113 if (mask & DRM_PCIE_SPEED_80) {
7114 if (current_data_rate == 2) {
7115 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7116 return;
7117 }
7118 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7119 } else if (mask & DRM_PCIE_SPEED_50) {
7120 if (current_data_rate == 1) {
7121 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7122 return;
7123 }
7124 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7125 }
7126
7127 bridge_pos = pci_pcie_cap(root);
7128 if (!bridge_pos)
7129 return;
7130
7131 gpu_pos = pci_pcie_cap(rdev->pdev);
7132 if (!gpu_pos)
7133 return;
7134
7135 if (mask & DRM_PCIE_SPEED_80) {
7136 /* re-try equalization if gen3 is not already enabled */
7137 if (current_data_rate != 2) {
7138 u16 bridge_cfg, gpu_cfg;
7139 u16 bridge_cfg2, gpu_cfg2;
7140 u32 max_lw, current_lw, tmp;
7141
7142 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7143 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7144
7145 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7146 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7147
7148 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7149 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7150
7151 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7152 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7153 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7154
7155 if (current_lw < max_lw) {
7156 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7157 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7158 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7159 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7160 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7161 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7162 }
7163 }
7164
7165 for (i = 0; i < 10; i++) {
7166 /* check status */
7167 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7168 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7169 break;
7170
7171 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7172 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7173
7174 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7175 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7176
7177 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7178 tmp |= LC_SET_QUIESCE;
7179 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7180
7181 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7182 tmp |= LC_REDO_EQ;
7183 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7184
7185 mdelay(100);
7186
7187 /* linkctl */
7188 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7189 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7190 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7191 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7192
7193 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7194 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7195 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7196 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7197
7198 /* linkctl2 */
7199 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7200 tmp16 &= ~((1 << 4) | (7 << 9));
7201 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7202 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7203
7204 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7205 tmp16 &= ~((1 << 4) | (7 << 9));
7206 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7207 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7208
7209 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7210 tmp &= ~LC_SET_QUIESCE;
7211 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7212 }
7213 }
7214 }
7215
7216 /* set the link speed */
7217 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7218 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7219 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7220
7221 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7222 tmp16 &= ~0xf;
7223 if (mask & DRM_PCIE_SPEED_80)
7224 tmp16 |= 3; /* gen3 */
7225 else if (mask & DRM_PCIE_SPEED_50)
7226 tmp16 |= 2; /* gen2 */
7227 else
7228 tmp16 |= 1; /* gen1 */
7229 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7230
7231 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7232 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7233 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7234
7235 for (i = 0; i < rdev->usec_timeout; i++) {
7236 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7237 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7238 break;
7239 udelay(1);
7240 }
7241}
Alex Deucher7235711a42013-04-04 13:58:09 -04007242
7243static void cik_program_aspm(struct radeon_device *rdev)
7244{
7245 u32 data, orig;
7246 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7247 bool disable_clkreq = false;
7248
7249 if (radeon_aspm == 0)
7250 return;
7251
7252 /* XXX double check IGPs */
7253 if (rdev->flags & RADEON_IS_IGP)
7254 return;
7255
7256 if (!(rdev->flags & RADEON_IS_PCIE))
7257 return;
7258
7259 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7260 data &= ~LC_XMIT_N_FTS_MASK;
7261 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7262 if (orig != data)
7263 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7264
7265 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7266 data |= LC_GO_TO_RECOVERY;
7267 if (orig != data)
7268 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7269
7270 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
7271 data |= P_IGNORE_EDB_ERR;
7272 if (orig != data)
7273 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
7274
7275 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7276 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7277 data |= LC_PMI_TO_L1_DIS;
7278 if (!disable_l0s)
7279 data |= LC_L0S_INACTIVITY(7);
7280
7281 if (!disable_l1) {
7282 data |= LC_L1_INACTIVITY(7);
7283 data &= ~LC_PMI_TO_L1_DIS;
7284 if (orig != data)
7285 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7286
7287 if (!disable_plloff_in_l1) {
7288 bool clk_req_support;
7289
7290 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
7291 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7292 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7293 if (orig != data)
7294 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
7295
7296 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
7297 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7298 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7299 if (orig != data)
7300 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
7301
7302 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
7303 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7304 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7305 if (orig != data)
7306 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
7307
7308 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
7309 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7310 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7311 if (orig != data)
7312 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
7313
7314 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7315 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7316 data |= LC_DYN_LANES_PWR_STATE(3);
7317 if (orig != data)
7318 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7319
7320 if (!disable_clkreq) {
7321 struct pci_dev *root = rdev->pdev->bus->self;
7322 u32 lnkcap;
7323
7324 clk_req_support = false;
7325 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7326 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7327 clk_req_support = true;
7328 } else {
7329 clk_req_support = false;
7330 }
7331
7332 if (clk_req_support) {
7333 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7334 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7335 if (orig != data)
7336 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7337
7338 orig = data = RREG32_SMC(THM_CLK_CNTL);
7339 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7340 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7341 if (orig != data)
7342 WREG32_SMC(THM_CLK_CNTL, data);
7343
7344 orig = data = RREG32_SMC(MISC_CLK_CTRL);
7345 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7346 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7347 if (orig != data)
7348 WREG32_SMC(MISC_CLK_CTRL, data);
7349
7350 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
7351 data &= ~BCLK_AS_XCLK;
7352 if (orig != data)
7353 WREG32_SMC(CG_CLKPIN_CNTL, data);
7354
7355 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
7356 data &= ~FORCE_BIF_REFCLK_EN;
7357 if (orig != data)
7358 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
7359
7360 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
7361 data &= ~MPLL_CLKOUT_SEL_MASK;
7362 data |= MPLL_CLKOUT_SEL(4);
7363 if (orig != data)
7364 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
7365 }
7366 }
7367 } else {
7368 if (orig != data)
7369 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7370 }
7371
7372 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
7373 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7374 if (orig != data)
7375 WREG32_PCIE_PORT(PCIE_CNTL2, data);
7376
7377 if (!disable_l0s) {
7378 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7379 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7380 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
7381 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7382 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7383 data &= ~LC_L0S_INACTIVITY_MASK;
7384 if (orig != data)
7385 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7386 }
7387 }
7388 }
7389}