blob: 24db316e0fd1137ff456b61120c041684a2cb30d [file] [log] [blame]
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +01001/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "i915_drv.h"
26#include "intel_ringbuffer.h"
27#include "intel_lrc.h"
28
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +030029/* Haswell does have the CXT_SIZE register however it does not appear to be
30 * valid. Now, docs explain in dwords what is in the context object. The full
31 * size is 70720 bytes, however, the power context and execlist context will
32 * never be saved (power context is stored elsewhere, and execlists don't work
33 * on HSW) - so the final size, including the extra state required for the
34 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
35 */
36#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
37/* Same as Haswell, but 72064 bytes now. */
38#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE)
39
40#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
41#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
42
43#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
44
Oscar Mateob8400f02017-04-10 07:34:32 -070045struct engine_class_info {
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010046 const char *name;
Oscar Mateob8400f02017-04-10 07:34:32 -070047 int (*init_legacy)(struct intel_engine_cs *engine);
48 int (*init_execlists)(struct intel_engine_cs *engine);
49};
50
51static const struct engine_class_info intel_engine_classes[] = {
52 [RENDER_CLASS] = {
53 .name = "rcs",
54 .init_execlists = logical_render_ring_init,
55 .init_legacy = intel_init_render_ring_buffer,
56 },
57 [COPY_ENGINE_CLASS] = {
58 .name = "bcs",
59 .init_execlists = logical_xcs_ring_init,
60 .init_legacy = intel_init_blt_ring_buffer,
61 },
62 [VIDEO_DECODE_CLASS] = {
63 .name = "vcs",
64 .init_execlists = logical_xcs_ring_init,
65 .init_legacy = intel_init_bsd_ring_buffer,
66 },
67 [VIDEO_ENHANCEMENT_CLASS] = {
68 .name = "vecs",
69 .init_execlists = logical_xcs_ring_init,
70 .init_legacy = intel_init_vebox_ring_buffer,
71 },
72};
73
74struct engine_info {
Michal Wajdeczko237ae7c2017-03-01 20:26:15 +000075 unsigned int hw_id;
Chris Wilson1d39f282017-04-11 13:43:06 +010076 unsigned int uabi_id;
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -070077 u8 class;
78 u8 instance;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010079 u32 mmio_base;
80 unsigned irq_shift;
Oscar Mateob8400f02017-04-10 07:34:32 -070081};
82
83static const struct engine_info intel_engines[] = {
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010084 [RCS] = {
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +010085 .hw_id = RCS_HW,
Chris Wilson1d39f282017-04-11 13:43:06 +010086 .uabi_id = I915_EXEC_RENDER,
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -070087 .class = RENDER_CLASS,
88 .instance = 0,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010089 .mmio_base = RENDER_RING_BASE,
90 .irq_shift = GEN8_RCS_IRQ_SHIFT,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010091 },
92 [BCS] = {
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +010093 .hw_id = BCS_HW,
Chris Wilson1d39f282017-04-11 13:43:06 +010094 .uabi_id = I915_EXEC_BLT,
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -070095 .class = COPY_ENGINE_CLASS,
96 .instance = 0,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010097 .mmio_base = BLT_RING_BASE,
98 .irq_shift = GEN8_BCS_IRQ_SHIFT,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +010099 },
100 [VCS] = {
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +0100101 .hw_id = VCS_HW,
Chris Wilson1d39f282017-04-11 13:43:06 +0100102 .uabi_id = I915_EXEC_BSD,
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -0700103 .class = VIDEO_DECODE_CLASS,
104 .instance = 0,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100105 .mmio_base = GEN6_BSD_RING_BASE,
106 .irq_shift = GEN8_VCS1_IRQ_SHIFT,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100107 },
108 [VCS2] = {
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +0100109 .hw_id = VCS2_HW,
Chris Wilson1d39f282017-04-11 13:43:06 +0100110 .uabi_id = I915_EXEC_BSD,
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -0700111 .class = VIDEO_DECODE_CLASS,
112 .instance = 1,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100113 .mmio_base = GEN8_BSD2_RING_BASE,
114 .irq_shift = GEN8_VCS2_IRQ_SHIFT,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100115 },
116 [VECS] = {
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +0100117 .hw_id = VECS_HW,
Chris Wilson1d39f282017-04-11 13:43:06 +0100118 .uabi_id = I915_EXEC_VEBOX,
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -0700119 .class = VIDEO_ENHANCEMENT_CLASS,
120 .instance = 0,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100121 .mmio_base = VEBOX_RING_BASE,
122 .irq_shift = GEN8_VECS_IRQ_SHIFT,
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100123 },
124};
125
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300126/**
127 * ___intel_engine_context_size() - return the size of the context for an engine
128 * @dev_priv: i915 device private
129 * @class: engine class
130 *
131 * Each engine class may require a different amount of space for a context
132 * image.
133 *
134 * Return: size (in bytes) of an engine class specific context image
135 *
136 * Note: this size includes the HWSP, which is part of the context image
137 * in LRC mode, but does not include the "shared data page" used with
138 * GuC submission. The caller should account for this if using the GuC.
139 */
140static u32
141__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
142{
143 u32 cxt_size;
144
145 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
146
147 switch (class) {
148 case RENDER_CLASS:
149 switch (INTEL_GEN(dev_priv)) {
150 default:
151 MISSING_CASE(INTEL_GEN(dev_priv));
Rodrigo Vivif65f8412017-07-06 14:06:24 -0700152 case 10:
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300153 case 9:
154 return GEN9_LR_CONTEXT_RENDER_SIZE;
155 case 8:
156 return i915.enable_execlists ?
157 GEN8_LR_CONTEXT_RENDER_SIZE :
158 GEN8_CXT_TOTAL_SIZE;
159 case 7:
160 if (IS_HASWELL(dev_priv))
161 return HSW_CXT_TOTAL_SIZE;
162
163 cxt_size = I915_READ(GEN7_CXT_SIZE);
164 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
165 PAGE_SIZE);
166 case 6:
167 cxt_size = I915_READ(CXT_SIZE);
168 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
169 PAGE_SIZE);
170 case 5:
171 case 4:
172 case 3:
173 case 2:
174 /* For the special day when i810 gets merged. */
175 case 1:
176 return 0;
177 }
178 break;
179 default:
180 MISSING_CASE(class);
181 case VIDEO_DECODE_CLASS:
182 case VIDEO_ENHANCEMENT_CLASS:
183 case COPY_ENGINE_CLASS:
184 if (INTEL_GEN(dev_priv) < 8)
185 return 0;
186 return GEN8_LR_CONTEXT_OTHER_SIZE;
187 }
188}
189
Akash Goel3b3f1652016-10-13 22:44:48 +0530190static int
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100191intel_engine_setup(struct drm_i915_private *dev_priv,
192 enum intel_engine_id id)
193{
194 const struct engine_info *info = &intel_engines[id];
Oscar Mateob8400f02017-04-10 07:34:32 -0700195 const struct engine_class_info *class_info;
Akash Goel3b3f1652016-10-13 22:44:48 +0530196 struct intel_engine_cs *engine;
197
Oscar Mateob8400f02017-04-10 07:34:32 -0700198 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
199 class_info = &intel_engine_classes[info->class];
200
Akash Goel3b3f1652016-10-13 22:44:48 +0530201 GEM_BUG_ON(dev_priv->engine[id]);
202 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
203 if (!engine)
204 return -ENOMEM;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100205
206 engine->id = id;
207 engine->i915 = dev_priv;
Oscar Mateo6e516142017-04-10 07:34:31 -0700208 WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
Oscar Mateob8400f02017-04-10 07:34:32 -0700209 class_info->name, info->instance) >=
210 sizeof(engine->name));
Chris Wilson1d39f282017-04-11 13:43:06 +0100211 engine->uabi_id = info->uabi_id;
Tvrtko Ursulin5ec2cf72016-08-16 17:04:20 +0100212 engine->hw_id = engine->guc_id = info->hw_id;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100213 engine->mmio_base = info->mmio_base;
214 engine->irq_shift = info->irq_shift;
Daniele Ceraolo Spurio09081802017-04-10 07:34:29 -0700215 engine->class = info->class;
216 engine->instance = info->instance;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100217
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300218 engine->context_size = __intel_engine_context_size(dev_priv,
219 engine->class);
220 if (WARN_ON(engine->context_size > BIT(20)))
221 engine->context_size = 0;
222
Chris Wilson0de91362016-11-14 20:41:01 +0000223 /* Nothing to do here, execute in order of dependencies */
224 engine->schedule = NULL;
225
Changbin Du3fc03062017-03-13 10:47:11 +0800226 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
227
Akash Goel3b3f1652016-10-13 22:44:48 +0530228 dev_priv->engine[id] = engine;
229 return 0;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100230}
231
232/**
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300233 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
Tvrtko Ursulinbf9e8422016-12-01 14:16:38 +0000234 * @dev_priv: i915 device private
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100235 *
236 * Return: non-zero if the initialization failed.
237 */
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300238int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100239{
Tvrtko Ursulinc1bb1142016-08-10 16:22:10 +0100240 struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
Chris Wilson5f9be052017-04-11 17:56:58 +0100241 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
Akash Goel3b3f1652016-10-13 22:44:48 +0530242 struct intel_engine_cs *engine;
243 enum intel_engine_id id;
Chris Wilson5f9be052017-04-11 17:56:58 +0100244 unsigned int mask = 0;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100245 unsigned int i;
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000246 int err;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100247
Tvrtko Ursulin70006ad2016-10-13 11:02:56 +0100248 WARN_ON(ring_mask == 0);
249 WARN_ON(ring_mask &
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100250 GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
251
252 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
253 if (!HAS_ENGINE(dev_priv, i))
254 continue;
255
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000256 err = intel_engine_setup(dev_priv, i);
257 if (err)
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100258 goto cleanup;
259
260 mask |= ENGINE_MASK(i);
261 }
262
263 /*
264 * Catch failures to update intel_engines table when the new engines
265 * are added to the driver by a warning and disabling the forgotten
266 * engines.
267 */
Tvrtko Ursulin70006ad2016-10-13 11:02:56 +0100268 if (WARN_ON(mask != ring_mask))
Tvrtko Ursulinc1bb1142016-08-10 16:22:10 +0100269 device_info->ring_mask = mask;
270
Chris Wilson5f9be052017-04-11 17:56:58 +0100271 /* We always presume we have at least RCS available for later probing */
272 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
273 err = -ENODEV;
274 goto cleanup;
275 }
276
Tvrtko Ursulinc1bb1142016-08-10 16:22:10 +0100277 device_info->num_rings = hweight32(mask);
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100278
279 return 0;
280
281cleanup:
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000282 for_each_engine(engine, dev_priv, id)
283 kfree(engine);
284 return err;
285}
286
287/**
Joonas Lahtinen63ffbcd2017-04-28 10:53:36 +0300288 * intel_engines_init() - init the Engine Command Streamers
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000289 * @dev_priv: i915 device private
290 *
291 * Return: non-zero if the initialization failed.
292 */
293int intel_engines_init(struct drm_i915_private *dev_priv)
294{
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000295 struct intel_engine_cs *engine;
296 enum intel_engine_id id, err_id;
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100297 int err;
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000298
Akash Goel3b3f1652016-10-13 22:44:48 +0530299 for_each_engine(engine, dev_priv, id) {
Oscar Mateob8400f02017-04-10 07:34:32 -0700300 const struct engine_class_info *class_info =
301 &intel_engine_classes[engine->class];
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000302 int (*init)(struct intel_engine_cs *engine);
303
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100304 if (i915.enable_execlists)
Oscar Mateob8400f02017-04-10 07:34:32 -0700305 init = class_info->init_execlists;
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000306 else
Oscar Mateob8400f02017-04-10 07:34:32 -0700307 init = class_info->init_legacy;
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100308
309 err = -EINVAL;
310 err_id = id;
311
312 if (GEM_WARN_ON(!init))
313 goto cleanup;
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000314
315 err = init(engine);
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100316 if (err)
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000317 goto cleanup;
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000318
Chris Wilsonff44ad52017-03-16 17:13:03 +0000319 GEM_BUG_ON(!engine->submit_request);
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000320 }
321
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000322 return 0;
323
324cleanup:
325 for_each_engine(engine, dev_priv, id) {
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100326 if (id >= err_id) {
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000327 kfree(engine);
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100328 dev_priv->engine[id] = NULL;
329 } else {
Tvrtko Ursulin8ee7c6e2017-02-16 12:23:22 +0000330 dev_priv->gt.cleanup_engine(engine);
Tvrtko Ursulin33def1f2017-06-16 14:03:38 +0100331 }
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100332 }
Chris Wilsonbb8f0f52017-01-24 11:01:34 +0000333 return err;
Tvrtko Ursulin88d2ba22016-07-13 16:03:40 +0100334}
335
Chris Wilson73cb9702016-10-28 13:58:46 +0100336void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
Chris Wilson57f275a2016-08-15 10:49:00 +0100337{
338 struct drm_i915_private *dev_priv = engine->i915;
339
Chris Wilson2ca9faa2017-04-05 16:30:54 +0100340 GEM_BUG_ON(!intel_engine_is_idle(engine));
Chris Wilson546cdbc2017-04-21 09:31:13 +0100341 GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
Chris Wilson2ca9faa2017-04-05 16:30:54 +0100342
Chris Wilson57f275a2016-08-15 10:49:00 +0100343 /* Our semaphore implementation is strictly monotonic (i.e. we proceed
344 * so long as the semaphore value in the register/page is greater
345 * than the sync value), so whenever we reset the seqno,
346 * so long as we reset the tracking semaphore value to 0, it will
347 * always be before the next request's seqno. If we don't reset
348 * the semaphore value, then when the seqno moves backwards all
349 * future waits will complete instantly (causing rendering corruption).
350 */
351 if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
352 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
353 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
354 if (HAS_VEBOX(dev_priv))
355 I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
356 }
Chris Wilson51d545d2016-08-15 10:49:02 +0100357 if (dev_priv->semaphore) {
358 struct page *page = i915_vma_first_page(dev_priv->semaphore);
359 void *semaphores;
360
361 /* Semaphores are in noncoherent memory, flush to be safe */
Chris Wilson24caf652017-03-20 14:56:09 +0000362 semaphores = kmap_atomic(page);
Chris Wilson57f275a2016-08-15 10:49:00 +0100363 memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
364 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
Chris Wilson51d545d2016-08-15 10:49:02 +0100365 drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
366 I915_NUM_ENGINES * gen8_semaphore_seqno_size);
Chris Wilson24caf652017-03-20 14:56:09 +0000367 kunmap_atomic(semaphores);
Chris Wilson57f275a2016-08-15 10:49:00 +0100368 }
Chris Wilson57f275a2016-08-15 10:49:00 +0100369
370 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
Chris Wilson14a6bbf2017-03-14 11:14:52 +0000371 clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
Chris Wilson73cb9702016-10-28 13:58:46 +0100372
Chris Wilson57f275a2016-08-15 10:49:00 +0100373 /* After manually advancing the seqno, fake the interrupt in case
374 * there are any waiters for that seqno.
375 */
376 intel_engine_wakeup(engine);
Chris Wilson2ca9faa2017-04-05 16:30:54 +0100377
378 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
Chris Wilson57f275a2016-08-15 10:49:00 +0100379}
380
Chris Wilson73cb9702016-10-28 13:58:46 +0100381static void intel_engine_init_timeline(struct intel_engine_cs *engine)
Chris Wilsondcff85c2016-08-05 10:14:11 +0100382{
Chris Wilson73cb9702016-10-28 13:58:46 +0100383 engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
Chris Wilsondcff85c2016-08-05 10:14:11 +0100384}
385
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100386/**
387 * intel_engines_setup_common - setup engine state not requiring hw access
388 * @engine: Engine to setup.
389 *
390 * Initializes @engine@ structure members shared between legacy and execlists
391 * submission modes which do not require hardware access.
392 *
393 * Typically done early in the submission mode specific engine setup stage.
394 */
395void intel_engine_setup_common(struct intel_engine_cs *engine)
396{
Chris Wilson20311bd2016-11-14 20:41:03 +0000397 engine->execlist_queue = RB_ROOT;
398 engine->execlist_first = NULL;
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100399
Chris Wilson73cb9702016-10-28 13:58:46 +0100400 intel_engine_init_timeline(engine);
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100401 intel_engine_init_hangcheck(engine);
Chris Wilson115003e92016-08-04 16:32:19 +0100402 i915_gem_batch_pool_init(engine, &engine->batch_pool);
Chris Wilson7756e452016-08-18 17:17:10 +0100403
404 intel_engine_init_cmd_parser(engine);
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100405}
406
Chris Wilsonadc320c2016-08-15 10:48:59 +0100407int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
408{
409 struct drm_i915_gem_object *obj;
410 struct i915_vma *vma;
411 int ret;
412
413 WARN_ON(engine->scratch);
414
Tvrtko Ursulin187685c2016-12-01 14:16:36 +0000415 obj = i915_gem_object_create_stolen(engine->i915, size);
Chris Wilsonadc320c2016-08-15 10:48:59 +0100416 if (!obj)
Chris Wilson920cf412016-10-28 13:58:30 +0100417 obj = i915_gem_object_create_internal(engine->i915, size);
Chris Wilsonadc320c2016-08-15 10:48:59 +0100418 if (IS_ERR(obj)) {
419 DRM_ERROR("Failed to allocate scratch page\n");
420 return PTR_ERR(obj);
421 }
422
Chris Wilsona01cb372017-01-16 15:21:30 +0000423 vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
Chris Wilsonadc320c2016-08-15 10:48:59 +0100424 if (IS_ERR(vma)) {
425 ret = PTR_ERR(vma);
426 goto err_unref;
427 }
428
429 ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
430 if (ret)
431 goto err_unref;
432
433 engine->scratch = vma;
Chris Wilsonbde13eb2016-08-15 10:49:07 +0100434 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
435 engine->name, i915_ggtt_offset(vma));
Chris Wilsonadc320c2016-08-15 10:48:59 +0100436 return 0;
437
438err_unref:
439 i915_gem_object_put(obj);
440 return ret;
441}
442
443static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
444{
Chris Wilson19880c42016-08-15 10:49:05 +0100445 i915_vma_unpin_and_release(&engine->scratch);
Chris Wilsonadc320c2016-08-15 10:48:59 +0100446}
447
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100448/**
449 * intel_engines_init_common - initialize cengine state which might require hw access
450 * @engine: Engine to initialize.
451 *
452 * Initializes @engine@ structure members shared between legacy and execlists
453 * submission modes which do require hardware access.
454 *
455 * Typcally done at later stages of submission mode specific engine setup.
456 *
457 * Returns zero on success or an error code on failure.
458 */
459int intel_engine_init_common(struct intel_engine_cs *engine)
460{
Chris Wilson266a2402017-05-04 10:33:08 +0100461 struct intel_ring *ring;
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100462 int ret;
463
Chris Wilsonff44ad52017-03-16 17:13:03 +0000464 engine->set_default_submission(engine);
465
Chris Wilsone8a9c582016-12-18 15:37:20 +0000466 /* We may need to do things with the shrinker which
467 * require us to immediately switch back to the default
468 * context. This can cause a problem as pinning the
469 * default context also requires GTT space which may not
470 * be available. To avoid this we always pin the default
471 * context.
472 */
Chris Wilson266a2402017-05-04 10:33:08 +0100473 ring = engine->context_pin(engine, engine->i915->kernel_context);
474 if (IS_ERR(ring))
475 return PTR_ERR(ring);
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100476
Chris Wilsone8a9c582016-12-18 15:37:20 +0000477 ret = intel_engine_init_breadcrumbs(engine);
478 if (ret)
479 goto err_unpin;
480
Chris Wilson4e50f082016-10-28 13:58:31 +0100481 ret = i915_gem_render_state_init(engine);
482 if (ret)
Chris Wilsone8a9c582016-12-18 15:37:20 +0000483 goto err_unpin;
Chris Wilson4e50f082016-10-28 13:58:31 +0100484
Chris Wilson7756e452016-08-18 17:17:10 +0100485 return 0;
Chris Wilsone8a9c582016-12-18 15:37:20 +0000486
487err_unpin:
488 engine->context_unpin(engine, engine->i915->kernel_context);
489 return ret;
Tvrtko Ursulin019bf272016-07-13 16:03:41 +0100490}
Chris Wilson96a945a2016-08-03 13:19:16 +0100491
492/**
493 * intel_engines_cleanup_common - cleans up the engine state created by
494 * the common initiailizers.
495 * @engine: Engine to cleanup.
496 *
497 * This cleans up everything created by the common helpers.
498 */
499void intel_engine_cleanup_common(struct intel_engine_cs *engine)
500{
Chris Wilsonadc320c2016-08-15 10:48:59 +0100501 intel_engine_cleanup_scratch(engine);
502
Chris Wilson4e50f082016-10-28 13:58:31 +0100503 i915_gem_render_state_fini(engine);
Chris Wilson96a945a2016-08-03 13:19:16 +0100504 intel_engine_fini_breadcrumbs(engine);
Chris Wilson7756e452016-08-18 17:17:10 +0100505 intel_engine_cleanup_cmd_parser(engine);
Chris Wilson96a945a2016-08-03 13:19:16 +0100506 i915_gem_batch_pool_fini(&engine->batch_pool);
Chris Wilsone8a9c582016-12-18 15:37:20 +0000507
508 engine->context_unpin(engine, engine->i915->kernel_context);
Chris Wilson96a945a2016-08-03 13:19:16 +0100509}
Chris Wilson1b365952016-10-04 21:11:31 +0100510
511u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
512{
513 struct drm_i915_private *dev_priv = engine->i915;
514 u64 acthd;
515
516 if (INTEL_GEN(dev_priv) >= 8)
517 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
518 RING_ACTHD_UDW(engine->mmio_base));
519 else if (INTEL_GEN(dev_priv) >= 4)
520 acthd = I915_READ(RING_ACTHD(engine->mmio_base));
521 else
522 acthd = I915_READ(ACTHD);
523
524 return acthd;
525}
526
527u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine)
528{
529 struct drm_i915_private *dev_priv = engine->i915;
530 u64 bbaddr;
531
532 if (INTEL_GEN(dev_priv) >= 8)
533 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base),
534 RING_BBADDR_UDW(engine->mmio_base));
535 else
536 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
537
538 return bbaddr;
539}
Chris Wilson0e704472016-10-12 10:05:17 +0100540
541const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
542{
543 switch (type) {
544 case I915_CACHE_NONE: return " uncached";
545 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
546 case I915_CACHE_L3_LLC: return " L3+LLC";
547 case I915_CACHE_WT: return " WT";
548 default: return "";
549 }
550}
551
552static inline uint32_t
553read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
554 int subslice, i915_reg_t reg)
555{
556 uint32_t mcr;
557 uint32_t ret;
558 enum forcewake_domains fw_domains;
559
560 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
561 FW_REG_READ);
562 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
563 GEN8_MCR_SELECTOR,
564 FW_REG_READ | FW_REG_WRITE);
565
566 spin_lock_irq(&dev_priv->uncore.lock);
567 intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
568
569 mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
570 /*
571 * The HW expects the slice and sublice selectors to be reset to 0
572 * after reading out the registers.
573 */
574 WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
575 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
576 mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
577 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
578
579 ret = I915_READ_FW(reg);
580
581 mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
582 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
583
584 intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
585 spin_unlock_irq(&dev_priv->uncore.lock);
586
587 return ret;
588}
589
590/* NB: please notice the memset */
591void intel_engine_get_instdone(struct intel_engine_cs *engine,
592 struct intel_instdone *instdone)
593{
594 struct drm_i915_private *dev_priv = engine->i915;
595 u32 mmio_base = engine->mmio_base;
596 int slice;
597 int subslice;
598
599 memset(instdone, 0, sizeof(*instdone));
600
601 switch (INTEL_GEN(dev_priv)) {
602 default:
603 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
604
605 if (engine->id != RCS)
606 break;
607
608 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
609 for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
610 instdone->sampler[slice][subslice] =
611 read_subslice_reg(dev_priv, slice, subslice,
612 GEN7_SAMPLER_INSTDONE);
613 instdone->row[slice][subslice] =
614 read_subslice_reg(dev_priv, slice, subslice,
615 GEN7_ROW_INSTDONE);
616 }
617 break;
618 case 7:
619 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
620
621 if (engine->id != RCS)
622 break;
623
624 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
625 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
626 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
627
628 break;
629 case 6:
630 case 5:
631 case 4:
632 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
633
634 if (engine->id == RCS)
635 /* HACK: Using the wrong struct member */
636 instdone->slice_common = I915_READ(GEN4_INSTDONE1);
637 break;
638 case 3:
639 case 2:
640 instdone->instdone = I915_READ(GEN2_INSTDONE);
641 break;
642 }
643}
Chris Wilsonf97fbf92017-02-13 17:15:14 +0000644
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000645static int wa_add(struct drm_i915_private *dev_priv,
646 i915_reg_t addr,
647 const u32 mask, const u32 val)
648{
649 const u32 idx = dev_priv->workarounds.count;
650
651 if (WARN_ON(idx >= I915_MAX_WA_REGS))
652 return -ENOSPC;
653
654 dev_priv->workarounds.reg[idx].addr = addr;
655 dev_priv->workarounds.reg[idx].value = val;
656 dev_priv->workarounds.reg[idx].mask = mask;
657
658 dev_priv->workarounds.count++;
659
660 return 0;
661}
662
663#define WA_REG(addr, mask, val) do { \
664 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
665 if (r) \
666 return r; \
667 } while (0)
668
669#define WA_SET_BIT_MASKED(addr, mask) \
670 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
671
672#define WA_CLR_BIT_MASKED(addr, mask) \
673 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
674
675#define WA_SET_FIELD_MASKED(addr, mask, value) \
676 WA_REG(addr, mask, _MASKED_FIELD(mask, value))
677
678#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
679#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
680
681#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
682
683static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
684 i915_reg_t reg)
685{
686 struct drm_i915_private *dev_priv = engine->i915;
687 struct i915_workarounds *wa = &dev_priv->workarounds;
688 const uint32_t index = wa->hw_whitelist_count[engine->id];
689
690 if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
691 return -EINVAL;
692
693 WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
694 i915_mmio_reg_offset(reg));
695 wa->hw_whitelist_count[engine->id]++;
696
697 return 0;
698}
699
700static int gen8_init_workarounds(struct intel_engine_cs *engine)
701{
702 struct drm_i915_private *dev_priv = engine->i915;
703
704 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
705
706 /* WaDisableAsyncFlipPerfMode:bdw,chv */
707 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
708
709 /* WaDisablePartialInstShootdown:bdw,chv */
710 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
711 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
712
713 /* Use Force Non-Coherent whenever executing a 3D context. This is a
714 * workaround for for a possible hang in the unlikely event a TLB
715 * invalidation occurs during a PSD flush.
716 */
717 /* WaForceEnableNonCoherent:bdw,chv */
718 /* WaHdcDisableFetchWhenMasked:bdw,chv */
719 WA_SET_BIT_MASKED(HDC_CHICKEN0,
720 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
721 HDC_FORCE_NON_COHERENT);
722
723 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
724 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
725 * polygons in the same 8x4 pixel/sample area to be processed without
726 * stalling waiting for the earlier ones to write to Hierarchical Z
727 * buffer."
728 *
729 * This optimization is off by default for BDW and CHV; turn it on.
730 */
731 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
732
733 /* Wa4x4STCOptimizationDisable:bdw,chv */
734 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
735
736 /*
737 * BSpec recommends 8x4 when MSAA is used,
738 * however in practice 16x4 seems fastest.
739 *
740 * Note that PS/WM thread counts depend on the WIZ hashing
741 * disable bit, which we don't touch here, but it's good
742 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
743 */
744 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
745 GEN6_WIZ_HASHING_MASK,
746 GEN6_WIZ_HASHING_16x4);
747
748 return 0;
749}
750
751static int bdw_init_workarounds(struct intel_engine_cs *engine)
752{
753 struct drm_i915_private *dev_priv = engine->i915;
754 int ret;
755
756 ret = gen8_init_workarounds(engine);
757 if (ret)
758 return ret;
759
760 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
761 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
762
763 /* WaDisableDopClockGating:bdw
764 *
765 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
766 * to disable EUTC clock gating.
767 */
768 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
769 DOP_CLOCK_GATING_DISABLE);
770
771 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
772 GEN8_SAMPLER_POWER_BYPASS_DIS);
773
774 WA_SET_BIT_MASKED(HDC_CHICKEN0,
775 /* WaForceContextSaveRestoreNonCoherent:bdw */
776 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
777 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
778 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
779
780 return 0;
781}
782
783static int chv_init_workarounds(struct intel_engine_cs *engine)
784{
785 struct drm_i915_private *dev_priv = engine->i915;
786 int ret;
787
788 ret = gen8_init_workarounds(engine);
789 if (ret)
790 return ret;
791
792 /* WaDisableThreadStallDopClockGating:chv */
793 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
794
795 /* Improve HiZ throughput on CHV. */
796 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
797
798 return 0;
799}
800
801static int gen9_init_workarounds(struct intel_engine_cs *engine)
802{
803 struct drm_i915_private *dev_priv = engine->i915;
804 int ret;
805
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700806 /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000807 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
808
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700809 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000810 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
811 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
812
Rodrigo Vivi98eed3d2017-06-19 14:21:47 -0700813 /* WaDisableKillLogic:bxt,skl,kbl */
814 if (!IS_COFFEELAKE(dev_priv))
815 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
816 ECOCHK_DIS_TLB);
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000817
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700818 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
819 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000820 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
821 FLOW_CONTROL_ENABLE |
822 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
823
824 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700825 if (!IS_COFFEELAKE(dev_priv))
826 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
827 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000828
829 /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */
830 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
831 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
832 GEN9_DG_MIRROR_FIX_ENABLE);
833
834 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
835 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
836 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
837 GEN9_RHWO_OPTIMIZATION_DISABLE);
838 /*
839 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
840 * but we do that in per ctx batchbuffer as there is an issue
841 * with this register not getting restored on ctx restore
842 */
843 }
844
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700845 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
846 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000847 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
Arkadiusz Hiler0b71cea2017-05-12 13:20:15 +0200848 GEN9_ENABLE_YV12_BUGFIX |
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000849 GEN9_ENABLE_GPGPU_PREEMPTION);
850
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700851 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
852 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000853 WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
854 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
855
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700856 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000857 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
858 GEN9_CCS_TLB_PREFETCH_ENABLE);
859
860 /* WaDisableMaskBasedCammingInRCC:bxt */
861 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
862 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
863 PIXEL_MASK_CAMMING_DISABLE);
864
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700865 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000866 WA_SET_BIT_MASKED(HDC_CHICKEN0,
867 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
868 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
869
870 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
871 * both tied to WaForceContextSaveRestoreNonCoherent
872 * in some hsds for skl. We keep the tie for all gen9. The
873 * documentation is a bit hazy and so we want to get common behaviour,
874 * even though there is no clear evidence we would need both on kbl/bxt.
875 * This area has been source of system hangs so we play it safe
876 * and mimic the skl regardless of what bspec says.
877 *
878 * Use Force Non-Coherent whenever executing a 3D context. This
879 * is a workaround for a possible hang in the unlikely event
880 * a TLB invalidation occurs during a PSD flush.
881 */
882
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700883 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000884 WA_SET_BIT_MASKED(HDC_CHICKEN0,
885 HDC_FORCE_NON_COHERENT);
886
Rodrigo Vivi98eed3d2017-06-19 14:21:47 -0700887 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
888 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
889 BDW_DISABLE_HDC_INVALIDATION);
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000890
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700891 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000892 if (IS_SKYLAKE(dev_priv) ||
893 IS_KABYLAKE(dev_priv) ||
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700894 IS_COFFEELAKE(dev_priv) ||
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000895 IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
896 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
897 GEN8_SAMPLER_POWER_BYPASS_DIS);
898
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700899 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000900 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
901
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700902 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000903 I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
904 GEN8_LQSC_FLUSH_COHERENT_LINES));
905
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700906 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000907 ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
908 if (ret)
909 return ret;
910
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700911 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000912 ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
913 if (ret)
914 return ret;
915
Rodrigo Vivi46c26662017-06-16 15:49:58 -0700916 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +0000917 ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
918 if (ret)
919 return ret;
920
921 return 0;
922}
923
924static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
925{
926 struct drm_i915_private *dev_priv = engine->i915;
927 u8 vals[3] = { 0, 0, 0 };
928 unsigned int i;
929
930 for (i = 0; i < 3; i++) {
931 u8 ss;
932
933 /*
934 * Only consider slices where one, and only one, subslice has 7
935 * EUs
936 */
937 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
938 continue;
939
940 /*
941 * subslice_7eu[i] != 0 (because of the check above) and
942 * ss_max == 4 (maximum number of subslices possible per slice)
943 *
944 * -> 0 <= ss <= 3;
945 */
946 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
947 vals[i] = 3 - ss;
948 }
949
950 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
951 return 0;
952
953 /* Tune IZ hashing. See intel_device_info_runtime_init() */
954 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
955 GEN9_IZ_HASHING_MASK(2) |
956 GEN9_IZ_HASHING_MASK(1) |
957 GEN9_IZ_HASHING_MASK(0),
958 GEN9_IZ_HASHING(2, vals[2]) |
959 GEN9_IZ_HASHING(1, vals[1]) |
960 GEN9_IZ_HASHING(0, vals[0]));
961
962 return 0;
963}
964
965static int skl_init_workarounds(struct intel_engine_cs *engine)
966{
967 struct drm_i915_private *dev_priv = engine->i915;
968 int ret;
969
970 ret = gen9_init_workarounds(engine);
971 if (ret)
972 return ret;
973
974 /*
975 * Actual WA is to disable percontext preemption granularity control
976 * until D0 which is the default case so this is equivalent to
977 * !WaDisablePerCtxtPreemptionGranularityControl:skl
978 */
979 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
980 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
981
982 /* WaEnableGapsTsvCreditFix:skl */
983 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
984 GEN9_GAPS_TSV_CREDIT_DISABLE));
985
986 /* WaDisableGafsUnitClkGating:skl */
987 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
988
989 /* WaInPlaceDecompressionHang:skl */
990 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
991 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
992 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
993
994 /* WaDisableLSQCROPERFforOCL:skl */
995 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
996 if (ret)
997 return ret;
998
999 return skl_tune_iz_hashing(engine);
1000}
1001
1002static int bxt_init_workarounds(struct intel_engine_cs *engine)
1003{
1004 struct drm_i915_private *dev_priv = engine->i915;
1005 int ret;
1006
1007 ret = gen9_init_workarounds(engine);
1008 if (ret)
1009 return ret;
1010
1011 /* WaStoreMultiplePTEenable:bxt */
1012 /* This is a requirement according to Hardware specification */
1013 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1014 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1015
1016 /* WaSetClckGatingDisableMedia:bxt */
1017 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1018 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1019 ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1020 }
1021
1022 /* WaDisableThreadStallDopClockGating:bxt */
1023 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1024 STALL_DOP_GATING_DISABLE);
1025
1026 /* WaDisablePooledEuLoadBalancingFix:bxt */
1027 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1028 WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2,
1029 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1030 }
1031
1032 /* WaDisableSbeCacheDispatchPortSharing:bxt */
1033 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
1034 WA_SET_BIT_MASKED(
1035 GEN7_HALF_SLICE_CHICKEN1,
1036 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1037 }
1038
1039 /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1040 /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1041 /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1042 /* WaDisableLSQCROPERFforOCL:bxt */
1043 if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1044 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1045 if (ret)
1046 return ret;
1047
1048 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1049 if (ret)
1050 return ret;
1051 }
1052
1053 /* WaProgramL3SqcReg1DefaultForPerf:bxt */
1054 if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
1055 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
1056 L3_HIGH_PRIO_CREDITS(2));
1057
1058 /* WaToEnableHwFixForPushConstHWBug:bxt */
1059 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1060 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1061 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1062
1063 /* WaInPlaceDecompressionHang:bxt */
1064 if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1065 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1066 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1067
1068 return 0;
1069}
1070
1071static int kbl_init_workarounds(struct intel_engine_cs *engine)
1072{
1073 struct drm_i915_private *dev_priv = engine->i915;
1074 int ret;
1075
1076 ret = gen9_init_workarounds(engine);
1077 if (ret)
1078 return ret;
1079
1080 /* WaEnableGapsTsvCreditFix:kbl */
1081 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1082 GEN9_GAPS_TSV_CREDIT_DISABLE));
1083
1084 /* WaDisableDynamicCreditSharing:kbl */
1085 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1086 WA_SET_BIT(GAMT_CHKN_BIT_REG,
1087 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1088
1089 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1090 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1091 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1092 HDC_FENCE_DEST_SLM_DISABLE);
1093
1094 /* WaToEnableHwFixForPushConstHWBug:kbl */
1095 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1096 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1097 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1098
1099 /* WaDisableGafsUnitClkGating:kbl */
1100 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1101
1102 /* WaDisableSbeCacheDispatchPortSharing:kbl */
1103 WA_SET_BIT_MASKED(
1104 GEN7_HALF_SLICE_CHICKEN1,
1105 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1106
1107 /* WaInPlaceDecompressionHang:kbl */
1108 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1109 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1110
1111 /* WaDisableLSQCROPERFforOCL:kbl */
1112 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1113 if (ret)
1114 return ret;
1115
1116 return 0;
1117}
1118
1119static int glk_init_workarounds(struct intel_engine_cs *engine)
1120{
1121 struct drm_i915_private *dev_priv = engine->i915;
1122 int ret;
1123
1124 ret = gen9_init_workarounds(engine);
1125 if (ret)
1126 return ret;
1127
1128 /* WaToEnableHwFixForPushConstHWBug:glk */
1129 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1130 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1131
1132 return 0;
1133}
1134
Rodrigo Vivi46c26662017-06-16 15:49:58 -07001135static int cfl_init_workarounds(struct intel_engine_cs *engine)
1136{
1137 struct drm_i915_private *dev_priv = engine->i915;
1138 int ret;
1139
1140 ret = gen9_init_workarounds(engine);
1141 if (ret)
1142 return ret;
1143
1144 /* WaEnableGapsTsvCreditFix:cfl */
1145 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1146 GEN9_GAPS_TSV_CREDIT_DISABLE));
1147
1148 /* WaToEnableHwFixForPushConstHWBug:cfl */
1149 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1150 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1151
1152 /* WaDisableGafsUnitClkGating:cfl */
1153 WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1154
1155 /* WaDisableSbeCacheDispatchPortSharing:cfl */
1156 WA_SET_BIT_MASKED(
1157 GEN7_HALF_SLICE_CHICKEN1,
1158 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1159
1160 /* WaInPlaceDecompressionHang:cfl */
1161 WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
1162 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1163
1164 return 0;
1165}
1166
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001167int init_workarounds_ring(struct intel_engine_cs *engine)
1168{
1169 struct drm_i915_private *dev_priv = engine->i915;
Chris Wilson02e012f2017-03-01 12:11:31 +00001170 int err;
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001171
1172 WARN_ON(engine->id != RCS);
1173
1174 dev_priv->workarounds.count = 0;
Chris Wilson02e012f2017-03-01 12:11:31 +00001175 dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001176
1177 if (IS_BROADWELL(dev_priv))
Chris Wilson02e012f2017-03-01 12:11:31 +00001178 err = bdw_init_workarounds(engine);
1179 else if (IS_CHERRYVIEW(dev_priv))
1180 err = chv_init_workarounds(engine);
1181 else if (IS_SKYLAKE(dev_priv))
1182 err = skl_init_workarounds(engine);
1183 else if (IS_BROXTON(dev_priv))
1184 err = bxt_init_workarounds(engine);
1185 else if (IS_KABYLAKE(dev_priv))
1186 err = kbl_init_workarounds(engine);
1187 else if (IS_GEMINILAKE(dev_priv))
1188 err = glk_init_workarounds(engine);
Rodrigo Vivi46c26662017-06-16 15:49:58 -07001189 else if (IS_COFFEELAKE(dev_priv))
1190 err = cfl_init_workarounds(engine);
Chris Wilson02e012f2017-03-01 12:11:31 +00001191 else
1192 err = 0;
1193 if (err)
1194 return err;
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001195
Chris Wilson02e012f2017-03-01 12:11:31 +00001196 DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
1197 engine->name, dev_priv->workarounds.count);
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001198 return 0;
1199}
1200
1201int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
1202{
1203 struct i915_workarounds *w = &req->i915->workarounds;
1204 u32 *cs;
1205 int ret, i;
1206
1207 if (w->count == 0)
1208 return 0;
1209
1210 ret = req->engine->emit_flush(req, EMIT_BARRIER);
1211 if (ret)
1212 return ret;
1213
1214 cs = intel_ring_begin(req, (w->count * 2 + 2));
1215 if (IS_ERR(cs))
1216 return PTR_ERR(cs);
1217
1218 *cs++ = MI_LOAD_REGISTER_IMM(w->count);
1219 for (i = 0; i < w->count; i++) {
1220 *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
1221 *cs++ = w->reg[i].value;
1222 }
1223 *cs++ = MI_NOOP;
1224
1225 intel_ring_advance(req, cs);
1226
1227 ret = req->engine->emit_flush(req, EMIT_BARRIER);
1228 if (ret)
1229 return ret;
1230
Tvrtko Ursulin133b4bd2017-02-16 12:23:23 +00001231 return 0;
1232}
1233
Chris Wilsona091d4e2017-05-30 13:13:33 +01001234static bool ring_is_idle(struct intel_engine_cs *engine)
1235{
1236 struct drm_i915_private *dev_priv = engine->i915;
1237 bool idle = true;
1238
1239 intel_runtime_pm_get(dev_priv);
1240
Chris Wilsonaed2fc12017-05-30 13:13:34 +01001241 /* First check that no commands are left in the ring */
1242 if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
1243 (I915_READ_TAIL(engine) & TAIL_ADDR))
1244 idle = false;
1245
Chris Wilsona091d4e2017-05-30 13:13:33 +01001246 /* No bit for gen2, so assume the CS parser is idle */
1247 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
1248 idle = false;
1249
1250 intel_runtime_pm_put(dev_priv);
1251
1252 return idle;
1253}
1254
Chris Wilson54003672017-03-03 12:19:46 +00001255/**
1256 * intel_engine_is_idle() - Report if the engine has finished process all work
1257 * @engine: the intel_engine_cs
1258 *
1259 * Return true if there are no requests pending, nothing left to be submitted
1260 * to hardware, and that the engine is idle.
1261 */
1262bool intel_engine_is_idle(struct intel_engine_cs *engine)
1263{
1264 struct drm_i915_private *dev_priv = engine->i915;
1265
Chris Wilsona8e9a412017-04-11 20:00:42 +01001266 /* More white lies, if wedged, hw state is inconsistent */
1267 if (i915_terminally_wedged(&dev_priv->gpu_error))
1268 return true;
1269
Chris Wilson54003672017-03-03 12:19:46 +00001270 /* Any inflight/incomplete requests? */
1271 if (!i915_seqno_passed(intel_engine_get_seqno(engine),
1272 intel_engine_last_submit(engine)))
1273 return false;
1274
Chris Wilson8968a362017-04-12 00:44:26 +01001275 if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
1276 return true;
1277
Chris Wilson54003672017-03-03 12:19:46 +00001278 /* Interrupt/tasklet pending? */
1279 if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
1280 return false;
1281
1282 /* Both ports drained, no more ELSP submission? */
Chris Wilson77f0d0e2017-05-17 13:10:00 +01001283 if (port_request(&engine->execlist_port[0]))
Chris Wilson54003672017-03-03 12:19:46 +00001284 return false;
1285
1286 /* Ring stopped? */
Chris Wilsona091d4e2017-05-30 13:13:33 +01001287 if (!ring_is_idle(engine))
Chris Wilson54003672017-03-03 12:19:46 +00001288 return false;
1289
1290 return true;
1291}
1292
Chris Wilson05425242017-03-03 12:19:47 +00001293bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
1294{
1295 struct intel_engine_cs *engine;
1296 enum intel_engine_id id;
1297
Chris Wilson8490ae202017-03-30 15:50:37 +01001298 if (READ_ONCE(dev_priv->gt.active_requests))
1299 return false;
1300
1301 /* If the driver is wedged, HW state may be very inconsistent and
1302 * report that it is still busy, even though we have stopped using it.
1303 */
1304 if (i915_terminally_wedged(&dev_priv->gpu_error))
1305 return true;
1306
Chris Wilson05425242017-03-03 12:19:47 +00001307 for_each_engine(engine, dev_priv, id) {
1308 if (!intel_engine_is_idle(engine))
1309 return false;
1310 }
1311
1312 return true;
1313}
1314
Chris Wilsonff44ad52017-03-16 17:13:03 +00001315void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1316{
1317 struct intel_engine_cs *engine;
1318 enum intel_engine_id id;
1319
1320 for_each_engine(engine, i915, id)
1321 engine->set_default_submission(engine);
1322}
1323
Chris Wilson6c067572017-05-17 13:10:03 +01001324void intel_engines_mark_idle(struct drm_i915_private *i915)
1325{
1326 struct intel_engine_cs *engine;
1327 enum intel_engine_id id;
1328
1329 for_each_engine(engine, i915, id) {
1330 intel_engine_disarm_breadcrumbs(engine);
1331 i915_gem_batch_pool_fini(&engine->batch_pool);
Chris Wilson9cd90012017-06-27 16:25:10 +01001332 tasklet_kill(&engine->irq_tasklet);
Chris Wilson6c067572017-05-17 13:10:03 +01001333 engine->no_priolist = false;
1334 }
1335}
1336
Chris Wilsonf97fbf92017-02-13 17:15:14 +00001337#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1338#include "selftests/mock_engine.c"
1339#endif