blob: 7e1bdd88eda30e65db27f43b1d4ffec18baac835 [file] [log] [blame]
Chris Wilson496b5752017-02-13 17:15:58 +00001/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
Chris Wilson79f0f472017-07-21 13:32:34 +010025#include <linux/kthread.h>
26
Chris Wilson496b5752017-02-13 17:15:58 +000027#include "../i915_selftest.h"
28
Chris Wilson79f0f472017-07-21 13:32:34 +010029#include "mock_context.h"
30#include "mock_drm.h"
31
Chris Wilson496b5752017-02-13 17:15:58 +000032struct hang {
33 struct drm_i915_private *i915;
34 struct drm_i915_gem_object *hws;
35 struct drm_i915_gem_object *obj;
36 u32 *seqno;
37 u32 *batch;
38};
39
40static int hang_init(struct hang *h, struct drm_i915_private *i915)
41{
42 void *vaddr;
43 int err;
44
45 memset(h, 0, sizeof(*h));
46 h->i915 = i915;
47
48 h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49 if (IS_ERR(h->hws))
50 return PTR_ERR(h->hws);
51
52 h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53 if (IS_ERR(h->obj)) {
54 err = PTR_ERR(h->obj);
55 goto err_hws;
56 }
57
58 i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59 vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60 if (IS_ERR(vaddr)) {
61 err = PTR_ERR(vaddr);
62 goto err_obj;
63 }
64 h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65
66 vaddr = i915_gem_object_pin_map(h->obj,
67 HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68 if (IS_ERR(vaddr)) {
69 err = PTR_ERR(vaddr);
70 goto err_unpin_hws;
71 }
72 h->batch = vaddr;
73
74 return 0;
75
76err_unpin_hws:
77 i915_gem_object_unpin_map(h->hws);
78err_obj:
79 i915_gem_object_put(h->obj);
80err_hws:
81 i915_gem_object_put(h->hws);
82 return err;
83}
84
85static u64 hws_address(const struct i915_vma *hws,
86 const struct drm_i915_gem_request *rq)
87{
88 return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89}
90
91static int emit_recurse_batch(struct hang *h,
92 struct drm_i915_gem_request *rq)
93{
94 struct drm_i915_private *i915 = h->i915;
95 struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96 struct i915_vma *hws, *vma;
97 unsigned int flags;
98 u32 *batch;
99 int err;
100
101 vma = i915_vma_instance(h->obj, vm, NULL);
102 if (IS_ERR(vma))
103 return PTR_ERR(vma);
104
105 hws = i915_vma_instance(h->hws, vm, NULL);
106 if (IS_ERR(hws))
107 return PTR_ERR(hws);
108
109 err = i915_vma_pin(vma, 0, 0, PIN_USER);
110 if (err)
111 return err;
112
113 err = i915_vma_pin(hws, 0, 0, PIN_USER);
114 if (err)
115 goto unpin_vma;
116
117 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
118 if (err)
119 goto unpin_hws;
120
121 err = i915_switch_context(rq);
122 if (err)
123 goto unpin_hws;
124
125 i915_vma_move_to_active(vma, rq, 0);
126 if (!i915_gem_object_has_active_reference(vma->obj)) {
127 i915_gem_object_get(vma->obj);
128 i915_gem_object_set_active_reference(vma->obj);
129 }
130
131 i915_vma_move_to_active(hws, rq, 0);
132 if (!i915_gem_object_has_active_reference(hws->obj)) {
133 i915_gem_object_get(hws->obj);
134 i915_gem_object_set_active_reference(hws->obj);
135 }
136
137 batch = h->batch;
138 if (INTEL_GEN(i915) >= 8) {
139 *batch++ = MI_STORE_DWORD_IMM_GEN4;
140 *batch++ = lower_32_bits(hws_address(hws, rq));
141 *batch++ = upper_32_bits(hws_address(hws, rq));
142 *batch++ = rq->fence.seqno;
143 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
144 *batch++ = lower_32_bits(vma->node.start);
145 *batch++ = upper_32_bits(vma->node.start);
146 } else if (INTEL_GEN(i915) >= 6) {
147 *batch++ = MI_STORE_DWORD_IMM_GEN4;
148 *batch++ = 0;
149 *batch++ = lower_32_bits(hws_address(hws, rq));
150 *batch++ = rq->fence.seqno;
151 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
152 *batch++ = lower_32_bits(vma->node.start);
153 } else if (INTEL_GEN(i915) >= 4) {
154 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
155 *batch++ = 0;
156 *batch++ = lower_32_bits(hws_address(hws, rq));
157 *batch++ = rq->fence.seqno;
158 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
159 *batch++ = lower_32_bits(vma->node.start);
160 } else {
161 *batch++ = MI_STORE_DWORD_IMM;
162 *batch++ = lower_32_bits(hws_address(hws, rq));
163 *batch++ = rq->fence.seqno;
164 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
165 *batch++ = lower_32_bits(vma->node.start);
166 }
167 *batch++ = MI_BATCH_BUFFER_END; /* not reached */
Chris Wilson60456d52017-09-26 16:34:09 +0100168 i915_gem_chipset_flush(h->i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000169
170 flags = 0;
171 if (INTEL_GEN(vm->i915) <= 5)
172 flags |= I915_DISPATCH_SECURE;
173
174 err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
175
176unpin_hws:
177 i915_vma_unpin(hws);
178unpin_vma:
179 i915_vma_unpin(vma);
180 return err;
181}
182
183static struct drm_i915_gem_request *
184hang_create_request(struct hang *h,
185 struct intel_engine_cs *engine,
186 struct i915_gem_context *ctx)
187{
188 struct drm_i915_gem_request *rq;
189 int err;
190
191 if (i915_gem_object_is_active(h->obj)) {
192 struct drm_i915_gem_object *obj;
193 void *vaddr;
194
195 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
196 if (IS_ERR(obj))
197 return ERR_CAST(obj);
198
199 vaddr = i915_gem_object_pin_map(obj,
200 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
201 if (IS_ERR(vaddr)) {
202 i915_gem_object_put(obj);
203 return ERR_CAST(vaddr);
204 }
205
206 i915_gem_object_unpin_map(h->obj);
207 i915_gem_object_put(h->obj);
208
209 h->obj = obj;
210 h->batch = vaddr;
211 }
212
213 rq = i915_gem_request_alloc(engine, ctx);
214 if (IS_ERR(rq))
215 return rq;
216
217 err = emit_recurse_batch(h, rq);
218 if (err) {
219 __i915_add_request(rq, false);
220 return ERR_PTR(err);
221 }
222
223 return rq;
224}
225
226static u32 hws_seqno(const struct hang *h,
227 const struct drm_i915_gem_request *rq)
228{
229 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
230}
231
232static void hang_fini(struct hang *h)
233{
234 *h->batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100235 i915_gem_chipset_flush(h->i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000236
237 i915_gem_object_unpin_map(h->obj);
238 i915_gem_object_put(h->obj);
239
240 i915_gem_object_unpin_map(h->hws);
241 i915_gem_object_put(h->hws);
242
243 i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
Chris Wilson496b5752017-02-13 17:15:58 +0000244}
245
246static int igt_hang_sanitycheck(void *arg)
247{
248 struct drm_i915_private *i915 = arg;
249 struct drm_i915_gem_request *rq;
250 struct intel_engine_cs *engine;
251 enum intel_engine_id id;
252 struct hang h;
253 int err;
254
255 /* Basic check that we can execute our hanging batch */
256
Chris Wilson496b5752017-02-13 17:15:58 +0000257 mutex_lock(&i915->drm.struct_mutex);
258 err = hang_init(&h, i915);
259 if (err)
260 goto unlock;
261
262 for_each_engine(engine, i915, id) {
263 long timeout;
264
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100265 if (!intel_engine_can_store_dword(engine))
266 continue;
267
Chris Wilson496b5752017-02-13 17:15:58 +0000268 rq = hang_create_request(&h, engine, i915->kernel_context);
269 if (IS_ERR(rq)) {
270 err = PTR_ERR(rq);
271 pr_err("Failed to create request for %s, err=%d\n",
272 engine->name, err);
273 goto fini;
274 }
275
276 i915_gem_request_get(rq);
277
278 *h.batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100279 i915_gem_chipset_flush(i915);
280
Chris Wilson496b5752017-02-13 17:15:58 +0000281 __i915_add_request(rq, true);
282
283 timeout = i915_wait_request(rq,
284 I915_WAIT_LOCKED,
285 MAX_SCHEDULE_TIMEOUT);
286 i915_gem_request_put(rq);
287
288 if (timeout < 0) {
289 err = timeout;
290 pr_err("Wait for request failed on %s, err=%d\n",
291 engine->name, err);
292 goto fini;
293 }
294 }
295
296fini:
297 hang_fini(&h);
298unlock:
299 mutex_unlock(&i915->drm.struct_mutex);
300 return err;
301}
302
Chris Wilson3744d492017-07-21 13:32:35 +0100303static void global_reset_lock(struct drm_i915_private *i915)
304{
305 struct intel_engine_cs *engine;
306 enum intel_engine_id id;
307
308 while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
309 wait_event(i915->gpu_error.reset_queue,
310 !test_bit(I915_RESET_BACKOFF,
311 &i915->gpu_error.flags));
312
313 for_each_engine(engine, i915, id) {
314 while (test_and_set_bit(I915_RESET_ENGINE + id,
315 &i915->gpu_error.flags))
316 wait_on_bit(&i915->gpu_error.flags,
317 I915_RESET_ENGINE + id,
318 TASK_UNINTERRUPTIBLE);
319 }
320}
321
322static void global_reset_unlock(struct drm_i915_private *i915)
323{
324 struct intel_engine_cs *engine;
325 enum intel_engine_id id;
326
327 for_each_engine(engine, i915, id)
328 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
329
330 clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
331 wake_up_all(&i915->gpu_error.reset_queue);
332}
333
Chris Wilson496b5752017-02-13 17:15:58 +0000334static int igt_global_reset(void *arg)
335{
336 struct drm_i915_private *i915 = arg;
337 unsigned int reset_count;
338 int err = 0;
339
340 /* Check that we can issue a global GPU reset */
341
Chris Wilson3744d492017-07-21 13:32:35 +0100342 global_reset_lock(i915);
Chris Wilson8c185ec2017-03-16 17:13:02 +0000343 set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
Chris Wilson496b5752017-02-13 17:15:58 +0000344
345 mutex_lock(&i915->drm.struct_mutex);
346 reset_count = i915_reset_count(&i915->gpu_error);
347
Chris Wilson535275d2017-07-21 13:32:37 +0100348 i915_reset(i915, I915_RESET_QUIET);
Chris Wilson496b5752017-02-13 17:15:58 +0000349
350 if (i915_reset_count(&i915->gpu_error) == reset_count) {
351 pr_err("No GPU reset recorded!\n");
352 err = -EINVAL;
353 }
354 mutex_unlock(&i915->drm.struct_mutex);
355
Chris Wilson8c185ec2017-03-16 17:13:02 +0000356 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
Chris Wilson3744d492017-07-21 13:32:35 +0100357 global_reset_unlock(i915);
Chris Wilsond5367302017-06-20 10:57:43 +0100358
Chris Wilson496b5752017-02-13 17:15:58 +0000359 if (i915_terminally_wedged(&i915->gpu_error))
360 err = -EIO;
361
362 return err;
363}
364
Michel Thierryabeb4de2017-06-20 10:57:50 +0100365static int igt_reset_engine(void *arg)
366{
367 struct drm_i915_private *i915 = arg;
368 struct intel_engine_cs *engine;
369 enum intel_engine_id id;
370 unsigned int reset_count, reset_engine_count;
371 int err = 0;
372
373 /* Check that we can issue a global GPU and engine reset */
374
375 if (!intel_has_reset_engine(i915))
376 return 0;
377
378 for_each_engine(engine, i915, id) {
379 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
380 reset_count = i915_reset_count(&i915->gpu_error);
381 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
382 engine);
383
Chris Wilson535275d2017-07-21 13:32:37 +0100384 err = i915_reset_engine(engine, I915_RESET_QUIET);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100385 if (err) {
386 pr_err("i915_reset_engine failed\n");
387 break;
388 }
389
390 if (i915_reset_count(&i915->gpu_error) != reset_count) {
391 pr_err("Full GPU reset recorded! (engine reset expected)\n");
392 err = -EINVAL;
393 break;
394 }
395
396 if (i915_reset_engine_count(&i915->gpu_error, engine) ==
397 reset_engine_count) {
398 pr_err("No %s engine reset recorded!\n", engine->name);
399 err = -EINVAL;
400 break;
401 }
402
403 clear_bit(I915_RESET_ENGINE + engine->id,
404 &i915->gpu_error.flags);
405 }
406
407 if (i915_terminally_wedged(&i915->gpu_error))
408 err = -EIO;
409
410 return err;
411}
412
Chris Wilson79f0f472017-07-21 13:32:34 +0100413static int active_engine(void *data)
414{
415 struct intel_engine_cs *engine = data;
416 struct drm_i915_gem_request *rq[2] = {};
417 struct i915_gem_context *ctx[2];
418 struct drm_file *file;
419 unsigned long count = 0;
420 int err = 0;
421
422 file = mock_file(engine->i915);
423 if (IS_ERR(file))
424 return PTR_ERR(file);
425
426 mutex_lock(&engine->i915->drm.struct_mutex);
427 ctx[0] = live_context(engine->i915, file);
428 mutex_unlock(&engine->i915->drm.struct_mutex);
429 if (IS_ERR(ctx[0])) {
430 err = PTR_ERR(ctx[0]);
431 goto err_file;
432 }
433
434 mutex_lock(&engine->i915->drm.struct_mutex);
435 ctx[1] = live_context(engine->i915, file);
436 mutex_unlock(&engine->i915->drm.struct_mutex);
437 if (IS_ERR(ctx[1])) {
438 err = PTR_ERR(ctx[1]);
439 i915_gem_context_put(ctx[0]);
440 goto err_file;
441 }
442
443 while (!kthread_should_stop()) {
444 unsigned int idx = count++ & 1;
445 struct drm_i915_gem_request *old = rq[idx];
446 struct drm_i915_gem_request *new;
447
448 mutex_lock(&engine->i915->drm.struct_mutex);
449 new = i915_gem_request_alloc(engine, ctx[idx]);
450 if (IS_ERR(new)) {
451 mutex_unlock(&engine->i915->drm.struct_mutex);
452 err = PTR_ERR(new);
453 break;
454 }
455
456 rq[idx] = i915_gem_request_get(new);
457 i915_add_request(new);
458 mutex_unlock(&engine->i915->drm.struct_mutex);
459
460 if (old) {
461 i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
462 i915_gem_request_put(old);
463 }
464 }
465
466 for (count = 0; count < ARRAY_SIZE(rq); count++)
467 i915_gem_request_put(rq[count]);
468
469err_file:
470 mock_file_free(engine->i915, file);
471 return err;
472}
473
474static int igt_reset_active_engines(void *arg)
475{
476 struct drm_i915_private *i915 = arg;
477 struct intel_engine_cs *engine, *active;
478 enum intel_engine_id id, tmp;
479 int err = 0;
480
481 /* Check that issuing a reset on one engine does not interfere
482 * with any other engine.
483 */
484
485 if (!intel_has_reset_engine(i915))
486 return 0;
487
488 for_each_engine(engine, i915, id) {
489 struct task_struct *threads[I915_NUM_ENGINES];
490 unsigned long resets[I915_NUM_ENGINES];
491 unsigned long global = i915_reset_count(&i915->gpu_error);
492 IGT_TIMEOUT(end_time);
493
494 memset(threads, 0, sizeof(threads));
495 for_each_engine(active, i915, tmp) {
496 struct task_struct *tsk;
497
498 if (active == engine)
499 continue;
500
501 resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
502 active);
503
504 tsk = kthread_run(active_engine, active,
505 "igt/%s", active->name);
506 if (IS_ERR(tsk)) {
507 err = PTR_ERR(tsk);
508 goto unwind;
509 }
510
511 threads[tmp] = tsk;
512 get_task_struct(tsk);
513 }
514
515 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
516 do {
Chris Wilson535275d2017-07-21 13:32:37 +0100517 err = i915_reset_engine(engine, I915_RESET_QUIET);
Chris Wilson79f0f472017-07-21 13:32:34 +0100518 if (err) {
519 pr_err("i915_reset_engine(%s) failed, err=%d\n",
520 engine->name, err);
521 break;
522 }
523 } while (time_before(jiffies, end_time));
524 clear_bit(I915_RESET_ENGINE + engine->id,
525 &i915->gpu_error.flags);
526
527unwind:
528 for_each_engine(active, i915, tmp) {
529 int ret;
530
531 if (!threads[tmp])
532 continue;
533
534 ret = kthread_stop(threads[tmp]);
535 if (ret) {
536 pr_err("kthread for active engine %s failed, err=%d\n",
537 active->name, ret);
538 if (!err)
539 err = ret;
540 }
541 put_task_struct(threads[tmp]);
542
543 if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
544 active)) {
545 pr_err("Innocent engine %s was reset (count=%ld)\n",
546 active->name,
547 i915_reset_engine_count(&i915->gpu_error,
548 active) - resets[tmp]);
549 err = -EIO;
550 }
551 }
552
553 if (global != i915_reset_count(&i915->gpu_error)) {
554 pr_err("Global reset (count=%ld)!\n",
555 i915_reset_count(&i915->gpu_error) - global);
556 err = -EIO;
557 }
558
559 if (err)
560 break;
561
562 cond_resched();
563 }
564
565 if (i915_terminally_wedged(&i915->gpu_error))
566 err = -EIO;
567
568 return err;
569}
570
Chris Wilson496b5752017-02-13 17:15:58 +0000571static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
572{
573 u32 reset_count;
574
575 rq->engine->hangcheck.stalled = true;
576 rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
577
578 reset_count = i915_reset_count(&rq->i915->gpu_error);
579
Chris Wilson8c185ec2017-03-16 17:13:02 +0000580 set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
Chris Wilson496b5752017-02-13 17:15:58 +0000581 wake_up_all(&rq->i915->gpu_error.wait_queue);
582
583 return reset_count;
584}
585
586static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
587{
588 return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
589 rq->fence.seqno),
590 10) &&
591 wait_for(i915_seqno_passed(hws_seqno(h, rq),
592 rq->fence.seqno),
593 1000));
594}
595
596static int igt_wait_reset(void *arg)
597{
598 struct drm_i915_private *i915 = arg;
599 struct drm_i915_gem_request *rq;
600 unsigned int reset_count;
601 struct hang h;
602 long timeout;
603 int err;
604
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100605 if (!intel_engine_can_store_dword(i915->engine[RCS]))
606 return 0;
607
Chris Wilson496b5752017-02-13 17:15:58 +0000608 /* Check that we detect a stuck waiter and issue a reset */
609
Chris Wilson3744d492017-07-21 13:32:35 +0100610 global_reset_lock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000611
612 mutex_lock(&i915->drm.struct_mutex);
613 err = hang_init(&h, i915);
614 if (err)
615 goto unlock;
616
617 rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
618 if (IS_ERR(rq)) {
619 err = PTR_ERR(rq);
620 goto fini;
621 }
622
623 i915_gem_request_get(rq);
624 __i915_add_request(rq, true);
625
626 if (!wait_for_hang(&h, rq)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100627 struct drm_printer p = drm_info_printer(i915->drm.dev);
628
Chris Wilson87dc03a2017-09-15 14:09:29 +0100629 pr_err("Failed to start request %x, at %x\n",
630 rq->fence.seqno, hws_seqno(&h, rq));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100631 intel_engine_dump(rq->engine, &p);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100632
633 i915_reset(i915, 0);
634 i915_gem_set_wedged(i915);
635
Chris Wilson496b5752017-02-13 17:15:58 +0000636 err = -EIO;
637 goto out_rq;
638 }
639
640 reset_count = fake_hangcheck(rq);
641
642 timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
643 if (timeout < 0) {
644 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
645 timeout);
646 err = timeout;
647 goto out_rq;
648 }
Chris Wilson496b5752017-02-13 17:15:58 +0000649
Chris Wilson8c185ec2017-03-16 17:13:02 +0000650 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
Chris Wilson496b5752017-02-13 17:15:58 +0000651 if (i915_reset_count(&i915->gpu_error) == reset_count) {
652 pr_err("No GPU reset recorded!\n");
653 err = -EINVAL;
654 goto out_rq;
655 }
656
657out_rq:
658 i915_gem_request_put(rq);
659fini:
660 hang_fini(&h);
661unlock:
662 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson3744d492017-07-21 13:32:35 +0100663 global_reset_unlock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000664
665 if (i915_terminally_wedged(&i915->gpu_error))
666 return -EIO;
667
668 return err;
669}
670
671static int igt_reset_queue(void *arg)
672{
673 struct drm_i915_private *i915 = arg;
674 struct intel_engine_cs *engine;
675 enum intel_engine_id id;
676 struct hang h;
677 int err;
678
679 /* Check that we replay pending requests following a hang */
680
Chris Wilson3744d492017-07-21 13:32:35 +0100681 global_reset_lock(i915);
682
Chris Wilson496b5752017-02-13 17:15:58 +0000683 mutex_lock(&i915->drm.struct_mutex);
684 err = hang_init(&h, i915);
685 if (err)
686 goto unlock;
687
688 for_each_engine(engine, i915, id) {
689 struct drm_i915_gem_request *prev;
690 IGT_TIMEOUT(end_time);
691 unsigned int count;
692
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100693 if (!intel_engine_can_store_dword(engine))
694 continue;
695
Chris Wilson496b5752017-02-13 17:15:58 +0000696 prev = hang_create_request(&h, engine, i915->kernel_context);
697 if (IS_ERR(prev)) {
698 err = PTR_ERR(prev);
699 goto fini;
700 }
701
702 i915_gem_request_get(prev);
703 __i915_add_request(prev, true);
704
705 count = 0;
706 do {
707 struct drm_i915_gem_request *rq;
708 unsigned int reset_count;
709
710 rq = hang_create_request(&h,
711 engine,
712 i915->kernel_context);
713 if (IS_ERR(rq)) {
714 err = PTR_ERR(rq);
715 goto fini;
716 }
717
718 i915_gem_request_get(rq);
719 __i915_add_request(rq, true);
720
721 if (!wait_for_hang(&h, prev)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100722 struct drm_printer p = drm_info_printer(i915->drm.dev);
723
Chris Wilson87dc03a2017-09-15 14:09:29 +0100724 pr_err("Failed to start request %x, at %x\n",
725 prev->fence.seqno, hws_seqno(&h, prev));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100726 intel_engine_dump(rq->engine, &p);
727
Chris Wilson496b5752017-02-13 17:15:58 +0000728 i915_gem_request_put(rq);
729 i915_gem_request_put(prev);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100730
731 i915_reset(i915, 0);
732 i915_gem_set_wedged(i915);
733
Chris Wilson496b5752017-02-13 17:15:58 +0000734 err = -EIO;
735 goto fini;
736 }
737
738 reset_count = fake_hangcheck(prev);
739
Chris Wilson535275d2017-07-21 13:32:37 +0100740 i915_reset(i915, I915_RESET_QUIET);
Chris Wilson496b5752017-02-13 17:15:58 +0000741
Chris Wilson8c185ec2017-03-16 17:13:02 +0000742 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
Chris Wilson496b5752017-02-13 17:15:58 +0000743 &i915->gpu_error.flags));
Chris Wilson8c185ec2017-03-16 17:13:02 +0000744
Chris Wilson496b5752017-02-13 17:15:58 +0000745 if (prev->fence.error != -EIO) {
746 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
747 prev->fence.error);
748 i915_gem_request_put(rq);
749 i915_gem_request_put(prev);
750 err = -EINVAL;
751 goto fini;
752 }
753
754 if (rq->fence.error) {
755 pr_err("Fence error status not zero [%d] after unrelated reset\n",
756 rq->fence.error);
757 i915_gem_request_put(rq);
758 i915_gem_request_put(prev);
759 err = -EINVAL;
760 goto fini;
761 }
762
763 if (i915_reset_count(&i915->gpu_error) == reset_count) {
764 pr_err("No GPU reset recorded!\n");
765 i915_gem_request_put(rq);
766 i915_gem_request_put(prev);
767 err = -EINVAL;
768 goto fini;
769 }
770
771 i915_gem_request_put(prev);
772 prev = rq;
773 count++;
774 } while (time_before(jiffies, end_time));
775 pr_info("%s: Completed %d resets\n", engine->name, count);
776
777 *h.batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100778 i915_gem_chipset_flush(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000779
780 i915_gem_request_put(prev);
781 }
782
783fini:
784 hang_fini(&h);
785unlock:
786 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson3744d492017-07-21 13:32:35 +0100787 global_reset_unlock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000788
789 if (i915_terminally_wedged(&i915->gpu_error))
790 return -EIO;
791
792 return err;
793}
794
Chris Wilson41533942017-07-28 12:21:10 +0100795static int igt_handle_error(void *arg)
Michel Thierryabeb4de2017-06-20 10:57:50 +0100796{
797 struct drm_i915_private *i915 = arg;
798 struct intel_engine_cs *engine = i915->engine[RCS];
799 struct hang h;
800 struct drm_i915_gem_request *rq;
Chris Wilson41533942017-07-28 12:21:10 +0100801 struct i915_gpu_state *error;
802 int err;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100803
804 /* Check that we can issue a global GPU and engine reset */
805
806 if (!intel_has_reset_engine(i915))
807 return 0;
808
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100809 if (!intel_engine_can_store_dword(i915->engine[RCS]))
810 return 0;
811
Michel Thierryabeb4de2017-06-20 10:57:50 +0100812 mutex_lock(&i915->drm.struct_mutex);
813
814 err = hang_init(&h, i915);
815 if (err)
Chris Wilson774eed42017-06-23 14:19:07 +0100816 goto err_unlock;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100817
818 rq = hang_create_request(&h, engine, i915->kernel_context);
819 if (IS_ERR(rq)) {
820 err = PTR_ERR(rq);
Chris Wilson774eed42017-06-23 14:19:07 +0100821 goto err_fini;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100822 }
823
824 i915_gem_request_get(rq);
825 __i915_add_request(rq, true);
826
Michel Thierryabeb4de2017-06-20 10:57:50 +0100827 if (!wait_for_hang(&h, rq)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100828 struct drm_printer p = drm_info_printer(i915->drm.dev);
829
Chris Wilson87dc03a2017-09-15 14:09:29 +0100830 pr_err("Failed to start request %x, at %x\n",
831 rq->fence.seqno, hws_seqno(&h, rq));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100832 intel_engine_dump(rq->engine, &p);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100833
834 i915_reset(i915, 0);
835 i915_gem_set_wedged(i915);
836
Michel Thierryabeb4de2017-06-20 10:57:50 +0100837 err = -EIO;
Chris Wilson774eed42017-06-23 14:19:07 +0100838 goto err_request;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100839 }
840
Michel Thierryabeb4de2017-06-20 10:57:50 +0100841 mutex_unlock(&i915->drm.struct_mutex);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100842
Chris Wilson41533942017-07-28 12:21:10 +0100843 /* Temporarily disable error capture */
844 error = xchg(&i915->gpu_error.first_error, (void *)-1);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100845
Chris Wilson41533942017-07-28 12:21:10 +0100846 engine->hangcheck.stalled = true;
847 engine->hangcheck.seqno = intel_engine_get_seqno(engine);
848
849 i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
850
851 xchg(&i915->gpu_error.first_error, error);
852
853 mutex_lock(&i915->drm.struct_mutex);
854
855 if (rq->fence.error != -EIO) {
856 pr_err("Guilty request not identified!\n");
Michel Thierryabeb4de2017-06-20 10:57:50 +0100857 err = -EINVAL;
Chris Wilson41533942017-07-28 12:21:10 +0100858 goto err_request;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100859 }
860
Chris Wilson774eed42017-06-23 14:19:07 +0100861err_request:
862 i915_gem_request_put(rq);
863err_fini:
864 hang_fini(&h);
865err_unlock:
866 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson41533942017-07-28 12:21:10 +0100867 return err;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100868}
869
Chris Wilson496b5752017-02-13 17:15:58 +0000870int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
871{
872 static const struct i915_subtest tests[] = {
Chris Wilson87dc03a2017-09-15 14:09:29 +0100873 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
Chris Wilson496b5752017-02-13 17:15:58 +0000874 SUBTEST(igt_hang_sanitycheck),
Michel Thierryabeb4de2017-06-20 10:57:50 +0100875 SUBTEST(igt_reset_engine),
Chris Wilson79f0f472017-07-21 13:32:34 +0100876 SUBTEST(igt_reset_active_engines),
Chris Wilson496b5752017-02-13 17:15:58 +0000877 SUBTEST(igt_wait_reset),
878 SUBTEST(igt_reset_queue),
Chris Wilson41533942017-07-28 12:21:10 +0100879 SUBTEST(igt_handle_error),
Chris Wilson496b5752017-02-13 17:15:58 +0000880 };
881
882 if (!intel_has_gpu_reset(i915))
883 return 0;
884
885 return i915_subtests(tests, i915);
886}