blob: 145bdc26553c0880f4ca281a44d6aae1a04b9d9c [file] [log] [blame]
Chris Wilson496b5752017-02-13 17:15:58 +00001/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
Chris Wilson79f0f472017-07-21 13:32:34 +010025#include <linux/kthread.h>
26
Chris Wilson496b5752017-02-13 17:15:58 +000027#include "../i915_selftest.h"
28
Chris Wilson79f0f472017-07-21 13:32:34 +010029#include "mock_context.h"
30#include "mock_drm.h"
31
Chris Wilson496b5752017-02-13 17:15:58 +000032struct hang {
33 struct drm_i915_private *i915;
34 struct drm_i915_gem_object *hws;
35 struct drm_i915_gem_object *obj;
36 u32 *seqno;
37 u32 *batch;
38};
39
40static int hang_init(struct hang *h, struct drm_i915_private *i915)
41{
42 void *vaddr;
43 int err;
44
45 memset(h, 0, sizeof(*h));
46 h->i915 = i915;
47
48 h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49 if (IS_ERR(h->hws))
50 return PTR_ERR(h->hws);
51
52 h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53 if (IS_ERR(h->obj)) {
54 err = PTR_ERR(h->obj);
55 goto err_hws;
56 }
57
58 i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59 vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60 if (IS_ERR(vaddr)) {
61 err = PTR_ERR(vaddr);
62 goto err_obj;
63 }
64 h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65
66 vaddr = i915_gem_object_pin_map(h->obj,
67 HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68 if (IS_ERR(vaddr)) {
69 err = PTR_ERR(vaddr);
70 goto err_unpin_hws;
71 }
72 h->batch = vaddr;
73
74 return 0;
75
76err_unpin_hws:
77 i915_gem_object_unpin_map(h->hws);
78err_obj:
79 i915_gem_object_put(h->obj);
80err_hws:
81 i915_gem_object_put(h->hws);
82 return err;
83}
84
85static u64 hws_address(const struct i915_vma *hws,
86 const struct drm_i915_gem_request *rq)
87{
88 return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89}
90
91static int emit_recurse_batch(struct hang *h,
92 struct drm_i915_gem_request *rq)
93{
94 struct drm_i915_private *i915 = h->i915;
95 struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96 struct i915_vma *hws, *vma;
97 unsigned int flags;
98 u32 *batch;
99 int err;
100
101 vma = i915_vma_instance(h->obj, vm, NULL);
102 if (IS_ERR(vma))
103 return PTR_ERR(vma);
104
105 hws = i915_vma_instance(h->hws, vm, NULL);
106 if (IS_ERR(hws))
107 return PTR_ERR(hws);
108
109 err = i915_vma_pin(vma, 0, 0, PIN_USER);
110 if (err)
111 return err;
112
113 err = i915_vma_pin(hws, 0, 0, PIN_USER);
114 if (err)
115 goto unpin_vma;
116
Chris Wilson496b5752017-02-13 17:15:58 +0000117 err = i915_switch_context(rq);
118 if (err)
119 goto unpin_hws;
120
121 i915_vma_move_to_active(vma, rq, 0);
122 if (!i915_gem_object_has_active_reference(vma->obj)) {
123 i915_gem_object_get(vma->obj);
124 i915_gem_object_set_active_reference(vma->obj);
125 }
126
127 i915_vma_move_to_active(hws, rq, 0);
128 if (!i915_gem_object_has_active_reference(hws->obj)) {
129 i915_gem_object_get(hws->obj);
130 i915_gem_object_set_active_reference(hws->obj);
131 }
132
133 batch = h->batch;
134 if (INTEL_GEN(i915) >= 8) {
135 *batch++ = MI_STORE_DWORD_IMM_GEN4;
136 *batch++ = lower_32_bits(hws_address(hws, rq));
137 *batch++ = upper_32_bits(hws_address(hws, rq));
138 *batch++ = rq->fence.seqno;
139 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
140 *batch++ = lower_32_bits(vma->node.start);
141 *batch++ = upper_32_bits(vma->node.start);
142 } else if (INTEL_GEN(i915) >= 6) {
143 *batch++ = MI_STORE_DWORD_IMM_GEN4;
144 *batch++ = 0;
145 *batch++ = lower_32_bits(hws_address(hws, rq));
146 *batch++ = rq->fence.seqno;
147 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
148 *batch++ = lower_32_bits(vma->node.start);
149 } else if (INTEL_GEN(i915) >= 4) {
150 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
151 *batch++ = 0;
152 *batch++ = lower_32_bits(hws_address(hws, rq));
153 *batch++ = rq->fence.seqno;
154 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
155 *batch++ = lower_32_bits(vma->node.start);
156 } else {
157 *batch++ = MI_STORE_DWORD_IMM;
158 *batch++ = lower_32_bits(hws_address(hws, rq));
159 *batch++ = rq->fence.seqno;
160 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
161 *batch++ = lower_32_bits(vma->node.start);
162 }
163 *batch++ = MI_BATCH_BUFFER_END; /* not reached */
Chris Wilson60456d52017-09-26 16:34:09 +0100164 i915_gem_chipset_flush(h->i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000165
166 flags = 0;
167 if (INTEL_GEN(vm->i915) <= 5)
168 flags |= I915_DISPATCH_SECURE;
169
170 err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
171
172unpin_hws:
173 i915_vma_unpin(hws);
174unpin_vma:
175 i915_vma_unpin(vma);
176 return err;
177}
178
179static struct drm_i915_gem_request *
180hang_create_request(struct hang *h,
181 struct intel_engine_cs *engine,
182 struct i915_gem_context *ctx)
183{
184 struct drm_i915_gem_request *rq;
185 int err;
186
187 if (i915_gem_object_is_active(h->obj)) {
188 struct drm_i915_gem_object *obj;
189 void *vaddr;
190
191 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
192 if (IS_ERR(obj))
193 return ERR_CAST(obj);
194
195 vaddr = i915_gem_object_pin_map(obj,
196 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
197 if (IS_ERR(vaddr)) {
198 i915_gem_object_put(obj);
199 return ERR_CAST(vaddr);
200 }
201
202 i915_gem_object_unpin_map(h->obj);
203 i915_gem_object_put(h->obj);
204
205 h->obj = obj;
206 h->batch = vaddr;
207 }
208
209 rq = i915_gem_request_alloc(engine, ctx);
210 if (IS_ERR(rq))
211 return rq;
212
213 err = emit_recurse_batch(h, rq);
214 if (err) {
215 __i915_add_request(rq, false);
216 return ERR_PTR(err);
217 }
218
219 return rq;
220}
221
222static u32 hws_seqno(const struct hang *h,
223 const struct drm_i915_gem_request *rq)
224{
225 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
226}
227
228static void hang_fini(struct hang *h)
229{
230 *h->batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100231 i915_gem_chipset_flush(h->i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000232
233 i915_gem_object_unpin_map(h->obj);
234 i915_gem_object_put(h->obj);
235
236 i915_gem_object_unpin_map(h->hws);
237 i915_gem_object_put(h->hws);
238
239 i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
Chris Wilson496b5752017-02-13 17:15:58 +0000240}
241
242static int igt_hang_sanitycheck(void *arg)
243{
244 struct drm_i915_private *i915 = arg;
245 struct drm_i915_gem_request *rq;
246 struct intel_engine_cs *engine;
247 enum intel_engine_id id;
248 struct hang h;
249 int err;
250
251 /* Basic check that we can execute our hanging batch */
252
Chris Wilson496b5752017-02-13 17:15:58 +0000253 mutex_lock(&i915->drm.struct_mutex);
254 err = hang_init(&h, i915);
255 if (err)
256 goto unlock;
257
258 for_each_engine(engine, i915, id) {
259 long timeout;
260
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100261 if (!intel_engine_can_store_dword(engine))
262 continue;
263
Chris Wilson496b5752017-02-13 17:15:58 +0000264 rq = hang_create_request(&h, engine, i915->kernel_context);
265 if (IS_ERR(rq)) {
266 err = PTR_ERR(rq);
267 pr_err("Failed to create request for %s, err=%d\n",
268 engine->name, err);
269 goto fini;
270 }
271
272 i915_gem_request_get(rq);
273
274 *h.batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100275 i915_gem_chipset_flush(i915);
276
Chris Wilson496b5752017-02-13 17:15:58 +0000277 __i915_add_request(rq, true);
278
279 timeout = i915_wait_request(rq,
280 I915_WAIT_LOCKED,
281 MAX_SCHEDULE_TIMEOUT);
282 i915_gem_request_put(rq);
283
284 if (timeout < 0) {
285 err = timeout;
286 pr_err("Wait for request failed on %s, err=%d\n",
287 engine->name, err);
288 goto fini;
289 }
290 }
291
292fini:
293 hang_fini(&h);
294unlock:
295 mutex_unlock(&i915->drm.struct_mutex);
296 return err;
297}
298
Chris Wilson3744d492017-07-21 13:32:35 +0100299static void global_reset_lock(struct drm_i915_private *i915)
300{
301 struct intel_engine_cs *engine;
302 enum intel_engine_id id;
303
304 while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
305 wait_event(i915->gpu_error.reset_queue,
306 !test_bit(I915_RESET_BACKOFF,
307 &i915->gpu_error.flags));
308
309 for_each_engine(engine, i915, id) {
310 while (test_and_set_bit(I915_RESET_ENGINE + id,
311 &i915->gpu_error.flags))
312 wait_on_bit(&i915->gpu_error.flags,
313 I915_RESET_ENGINE + id,
314 TASK_UNINTERRUPTIBLE);
315 }
316}
317
318static void global_reset_unlock(struct drm_i915_private *i915)
319{
320 struct intel_engine_cs *engine;
321 enum intel_engine_id id;
322
323 for_each_engine(engine, i915, id)
324 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
325
326 clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
327 wake_up_all(&i915->gpu_error.reset_queue);
328}
329
Chris Wilson496b5752017-02-13 17:15:58 +0000330static int igt_global_reset(void *arg)
331{
332 struct drm_i915_private *i915 = arg;
333 unsigned int reset_count;
334 int err = 0;
335
336 /* Check that we can issue a global GPU reset */
337
Chris Wilson3744d492017-07-21 13:32:35 +0100338 global_reset_lock(i915);
Chris Wilson8c185ec2017-03-16 17:13:02 +0000339 set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
Chris Wilson496b5752017-02-13 17:15:58 +0000340
341 mutex_lock(&i915->drm.struct_mutex);
342 reset_count = i915_reset_count(&i915->gpu_error);
343
Chris Wilson535275d2017-07-21 13:32:37 +0100344 i915_reset(i915, I915_RESET_QUIET);
Chris Wilson496b5752017-02-13 17:15:58 +0000345
346 if (i915_reset_count(&i915->gpu_error) == reset_count) {
347 pr_err("No GPU reset recorded!\n");
348 err = -EINVAL;
349 }
350 mutex_unlock(&i915->drm.struct_mutex);
351
Chris Wilson8c185ec2017-03-16 17:13:02 +0000352 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
Chris Wilson3744d492017-07-21 13:32:35 +0100353 global_reset_unlock(i915);
Chris Wilsond5367302017-06-20 10:57:43 +0100354
Chris Wilson496b5752017-02-13 17:15:58 +0000355 if (i915_terminally_wedged(&i915->gpu_error))
356 err = -EIO;
357
358 return err;
359}
360
Michel Thierryabeb4de2017-06-20 10:57:50 +0100361static int igt_reset_engine(void *arg)
362{
363 struct drm_i915_private *i915 = arg;
364 struct intel_engine_cs *engine;
365 enum intel_engine_id id;
366 unsigned int reset_count, reset_engine_count;
367 int err = 0;
368
369 /* Check that we can issue a global GPU and engine reset */
370
371 if (!intel_has_reset_engine(i915))
372 return 0;
373
374 for_each_engine(engine, i915, id) {
375 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
376 reset_count = i915_reset_count(&i915->gpu_error);
377 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
378 engine);
379
Chris Wilson535275d2017-07-21 13:32:37 +0100380 err = i915_reset_engine(engine, I915_RESET_QUIET);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100381 if (err) {
382 pr_err("i915_reset_engine failed\n");
383 break;
384 }
385
386 if (i915_reset_count(&i915->gpu_error) != reset_count) {
387 pr_err("Full GPU reset recorded! (engine reset expected)\n");
388 err = -EINVAL;
389 break;
390 }
391
392 if (i915_reset_engine_count(&i915->gpu_error, engine) ==
393 reset_engine_count) {
394 pr_err("No %s engine reset recorded!\n", engine->name);
395 err = -EINVAL;
396 break;
397 }
398
399 clear_bit(I915_RESET_ENGINE + engine->id,
400 &i915->gpu_error.flags);
401 }
402
403 if (i915_terminally_wedged(&i915->gpu_error))
404 err = -EIO;
405
406 return err;
407}
408
Chris Wilson79f0f472017-07-21 13:32:34 +0100409static int active_engine(void *data)
410{
411 struct intel_engine_cs *engine = data;
412 struct drm_i915_gem_request *rq[2] = {};
413 struct i915_gem_context *ctx[2];
414 struct drm_file *file;
415 unsigned long count = 0;
416 int err = 0;
417
418 file = mock_file(engine->i915);
419 if (IS_ERR(file))
420 return PTR_ERR(file);
421
422 mutex_lock(&engine->i915->drm.struct_mutex);
423 ctx[0] = live_context(engine->i915, file);
424 mutex_unlock(&engine->i915->drm.struct_mutex);
425 if (IS_ERR(ctx[0])) {
426 err = PTR_ERR(ctx[0]);
427 goto err_file;
428 }
429
430 mutex_lock(&engine->i915->drm.struct_mutex);
431 ctx[1] = live_context(engine->i915, file);
432 mutex_unlock(&engine->i915->drm.struct_mutex);
433 if (IS_ERR(ctx[1])) {
434 err = PTR_ERR(ctx[1]);
435 i915_gem_context_put(ctx[0]);
436 goto err_file;
437 }
438
439 while (!kthread_should_stop()) {
440 unsigned int idx = count++ & 1;
441 struct drm_i915_gem_request *old = rq[idx];
442 struct drm_i915_gem_request *new;
443
444 mutex_lock(&engine->i915->drm.struct_mutex);
445 new = i915_gem_request_alloc(engine, ctx[idx]);
446 if (IS_ERR(new)) {
447 mutex_unlock(&engine->i915->drm.struct_mutex);
448 err = PTR_ERR(new);
449 break;
450 }
451
452 rq[idx] = i915_gem_request_get(new);
453 i915_add_request(new);
454 mutex_unlock(&engine->i915->drm.struct_mutex);
455
456 if (old) {
457 i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
458 i915_gem_request_put(old);
459 }
460 }
461
462 for (count = 0; count < ARRAY_SIZE(rq); count++)
463 i915_gem_request_put(rq[count]);
464
465err_file:
466 mock_file_free(engine->i915, file);
467 return err;
468}
469
470static int igt_reset_active_engines(void *arg)
471{
472 struct drm_i915_private *i915 = arg;
473 struct intel_engine_cs *engine, *active;
474 enum intel_engine_id id, tmp;
475 int err = 0;
476
477 /* Check that issuing a reset on one engine does not interfere
478 * with any other engine.
479 */
480
481 if (!intel_has_reset_engine(i915))
482 return 0;
483
484 for_each_engine(engine, i915, id) {
485 struct task_struct *threads[I915_NUM_ENGINES];
486 unsigned long resets[I915_NUM_ENGINES];
487 unsigned long global = i915_reset_count(&i915->gpu_error);
488 IGT_TIMEOUT(end_time);
489
490 memset(threads, 0, sizeof(threads));
491 for_each_engine(active, i915, tmp) {
492 struct task_struct *tsk;
493
494 if (active == engine)
495 continue;
496
497 resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
498 active);
499
500 tsk = kthread_run(active_engine, active,
501 "igt/%s", active->name);
502 if (IS_ERR(tsk)) {
503 err = PTR_ERR(tsk);
504 goto unwind;
505 }
506
507 threads[tmp] = tsk;
508 get_task_struct(tsk);
509 }
510
511 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
512 do {
Chris Wilson535275d2017-07-21 13:32:37 +0100513 err = i915_reset_engine(engine, I915_RESET_QUIET);
Chris Wilson79f0f472017-07-21 13:32:34 +0100514 if (err) {
515 pr_err("i915_reset_engine(%s) failed, err=%d\n",
516 engine->name, err);
517 break;
518 }
519 } while (time_before(jiffies, end_time));
520 clear_bit(I915_RESET_ENGINE + engine->id,
521 &i915->gpu_error.flags);
522
523unwind:
524 for_each_engine(active, i915, tmp) {
525 int ret;
526
527 if (!threads[tmp])
528 continue;
529
530 ret = kthread_stop(threads[tmp]);
531 if (ret) {
532 pr_err("kthread for active engine %s failed, err=%d\n",
533 active->name, ret);
534 if (!err)
535 err = ret;
536 }
537 put_task_struct(threads[tmp]);
538
539 if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
540 active)) {
541 pr_err("Innocent engine %s was reset (count=%ld)\n",
542 active->name,
543 i915_reset_engine_count(&i915->gpu_error,
544 active) - resets[tmp]);
545 err = -EIO;
546 }
547 }
548
549 if (global != i915_reset_count(&i915->gpu_error)) {
550 pr_err("Global reset (count=%ld)!\n",
551 i915_reset_count(&i915->gpu_error) - global);
552 err = -EIO;
553 }
554
555 if (err)
556 break;
557
558 cond_resched();
559 }
560
561 if (i915_terminally_wedged(&i915->gpu_error))
562 err = -EIO;
563
564 return err;
565}
566
Chris Wilson496b5752017-02-13 17:15:58 +0000567static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
568{
569 u32 reset_count;
570
571 rq->engine->hangcheck.stalled = true;
572 rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
573
574 reset_count = i915_reset_count(&rq->i915->gpu_error);
575
Chris Wilson8c185ec2017-03-16 17:13:02 +0000576 set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
Chris Wilson496b5752017-02-13 17:15:58 +0000577 wake_up_all(&rq->i915->gpu_error.wait_queue);
578
579 return reset_count;
580}
581
582static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
583{
584 return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
585 rq->fence.seqno),
586 10) &&
587 wait_for(i915_seqno_passed(hws_seqno(h, rq),
588 rq->fence.seqno),
589 1000));
590}
591
592static int igt_wait_reset(void *arg)
593{
594 struct drm_i915_private *i915 = arg;
595 struct drm_i915_gem_request *rq;
596 unsigned int reset_count;
597 struct hang h;
598 long timeout;
599 int err;
600
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100601 if (!intel_engine_can_store_dword(i915->engine[RCS]))
602 return 0;
603
Chris Wilson496b5752017-02-13 17:15:58 +0000604 /* Check that we detect a stuck waiter and issue a reset */
605
Chris Wilson3744d492017-07-21 13:32:35 +0100606 global_reset_lock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000607
608 mutex_lock(&i915->drm.struct_mutex);
609 err = hang_init(&h, i915);
610 if (err)
611 goto unlock;
612
613 rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
614 if (IS_ERR(rq)) {
615 err = PTR_ERR(rq);
616 goto fini;
617 }
618
619 i915_gem_request_get(rq);
620 __i915_add_request(rq, true);
621
622 if (!wait_for_hang(&h, rq)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100623 struct drm_printer p = drm_info_printer(i915->drm.dev);
624
Chris Wilson87dc03a2017-09-15 14:09:29 +0100625 pr_err("Failed to start request %x, at %x\n",
626 rq->fence.seqno, hws_seqno(&h, rq));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100627 intel_engine_dump(rq->engine, &p);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100628
629 i915_reset(i915, 0);
630 i915_gem_set_wedged(i915);
631
Chris Wilson496b5752017-02-13 17:15:58 +0000632 err = -EIO;
633 goto out_rq;
634 }
635
636 reset_count = fake_hangcheck(rq);
637
638 timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
639 if (timeout < 0) {
640 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
641 timeout);
642 err = timeout;
643 goto out_rq;
644 }
Chris Wilson496b5752017-02-13 17:15:58 +0000645
Chris Wilson8c185ec2017-03-16 17:13:02 +0000646 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
Chris Wilson496b5752017-02-13 17:15:58 +0000647 if (i915_reset_count(&i915->gpu_error) == reset_count) {
648 pr_err("No GPU reset recorded!\n");
649 err = -EINVAL;
650 goto out_rq;
651 }
652
653out_rq:
654 i915_gem_request_put(rq);
655fini:
656 hang_fini(&h);
657unlock:
658 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson3744d492017-07-21 13:32:35 +0100659 global_reset_unlock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000660
661 if (i915_terminally_wedged(&i915->gpu_error))
662 return -EIO;
663
664 return err;
665}
666
667static int igt_reset_queue(void *arg)
668{
669 struct drm_i915_private *i915 = arg;
670 struct intel_engine_cs *engine;
671 enum intel_engine_id id;
672 struct hang h;
673 int err;
674
675 /* Check that we replay pending requests following a hang */
676
Chris Wilson3744d492017-07-21 13:32:35 +0100677 global_reset_lock(i915);
678
Chris Wilson496b5752017-02-13 17:15:58 +0000679 mutex_lock(&i915->drm.struct_mutex);
680 err = hang_init(&h, i915);
681 if (err)
682 goto unlock;
683
684 for_each_engine(engine, i915, id) {
685 struct drm_i915_gem_request *prev;
686 IGT_TIMEOUT(end_time);
687 unsigned int count;
688
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100689 if (!intel_engine_can_store_dword(engine))
690 continue;
691
Chris Wilson496b5752017-02-13 17:15:58 +0000692 prev = hang_create_request(&h, engine, i915->kernel_context);
693 if (IS_ERR(prev)) {
694 err = PTR_ERR(prev);
695 goto fini;
696 }
697
698 i915_gem_request_get(prev);
699 __i915_add_request(prev, true);
700
701 count = 0;
702 do {
703 struct drm_i915_gem_request *rq;
704 unsigned int reset_count;
705
706 rq = hang_create_request(&h,
707 engine,
708 i915->kernel_context);
709 if (IS_ERR(rq)) {
710 err = PTR_ERR(rq);
711 goto fini;
712 }
713
714 i915_gem_request_get(rq);
715 __i915_add_request(rq, true);
716
717 if (!wait_for_hang(&h, prev)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100718 struct drm_printer p = drm_info_printer(i915->drm.dev);
719
Chris Wilson87dc03a2017-09-15 14:09:29 +0100720 pr_err("Failed to start request %x, at %x\n",
721 prev->fence.seqno, hws_seqno(&h, prev));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100722 intel_engine_dump(rq->engine, &p);
723
Chris Wilson496b5752017-02-13 17:15:58 +0000724 i915_gem_request_put(rq);
725 i915_gem_request_put(prev);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100726
727 i915_reset(i915, 0);
728 i915_gem_set_wedged(i915);
729
Chris Wilson496b5752017-02-13 17:15:58 +0000730 err = -EIO;
731 goto fini;
732 }
733
734 reset_count = fake_hangcheck(prev);
735
Chris Wilson535275d2017-07-21 13:32:37 +0100736 i915_reset(i915, I915_RESET_QUIET);
Chris Wilson496b5752017-02-13 17:15:58 +0000737
Chris Wilson8c185ec2017-03-16 17:13:02 +0000738 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
Chris Wilson496b5752017-02-13 17:15:58 +0000739 &i915->gpu_error.flags));
Chris Wilson8c185ec2017-03-16 17:13:02 +0000740
Chris Wilson496b5752017-02-13 17:15:58 +0000741 if (prev->fence.error != -EIO) {
742 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
743 prev->fence.error);
744 i915_gem_request_put(rq);
745 i915_gem_request_put(prev);
746 err = -EINVAL;
747 goto fini;
748 }
749
750 if (rq->fence.error) {
751 pr_err("Fence error status not zero [%d] after unrelated reset\n",
752 rq->fence.error);
753 i915_gem_request_put(rq);
754 i915_gem_request_put(prev);
755 err = -EINVAL;
756 goto fini;
757 }
758
759 if (i915_reset_count(&i915->gpu_error) == reset_count) {
760 pr_err("No GPU reset recorded!\n");
761 i915_gem_request_put(rq);
762 i915_gem_request_put(prev);
763 err = -EINVAL;
764 goto fini;
765 }
766
767 i915_gem_request_put(prev);
768 prev = rq;
769 count++;
770 } while (time_before(jiffies, end_time));
771 pr_info("%s: Completed %d resets\n", engine->name, count);
772
773 *h.batch = MI_BATCH_BUFFER_END;
Chris Wilson60456d52017-09-26 16:34:09 +0100774 i915_gem_chipset_flush(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000775
776 i915_gem_request_put(prev);
777 }
778
779fini:
780 hang_fini(&h);
781unlock:
782 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson3744d492017-07-21 13:32:35 +0100783 global_reset_unlock(i915);
Chris Wilson496b5752017-02-13 17:15:58 +0000784
785 if (i915_terminally_wedged(&i915->gpu_error))
786 return -EIO;
787
788 return err;
789}
790
Chris Wilson41533942017-07-28 12:21:10 +0100791static int igt_handle_error(void *arg)
Michel Thierryabeb4de2017-06-20 10:57:50 +0100792{
793 struct drm_i915_private *i915 = arg;
794 struct intel_engine_cs *engine = i915->engine[RCS];
795 struct hang h;
796 struct drm_i915_gem_request *rq;
Chris Wilson41533942017-07-28 12:21:10 +0100797 struct i915_gpu_state *error;
798 int err;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100799
800 /* Check that we can issue a global GPU and engine reset */
801
802 if (!intel_has_reset_engine(i915))
803 return 0;
804
Chris Wilsonf2f5c062017-08-16 09:52:04 +0100805 if (!intel_engine_can_store_dword(i915->engine[RCS]))
806 return 0;
807
Michel Thierryabeb4de2017-06-20 10:57:50 +0100808 mutex_lock(&i915->drm.struct_mutex);
809
810 err = hang_init(&h, i915);
811 if (err)
Chris Wilson774eed42017-06-23 14:19:07 +0100812 goto err_unlock;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100813
814 rq = hang_create_request(&h, engine, i915->kernel_context);
815 if (IS_ERR(rq)) {
816 err = PTR_ERR(rq);
Chris Wilson774eed42017-06-23 14:19:07 +0100817 goto err_fini;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100818 }
819
820 i915_gem_request_get(rq);
821 __i915_add_request(rq, true);
822
Michel Thierryabeb4de2017-06-20 10:57:50 +0100823 if (!wait_for_hang(&h, rq)) {
Chris Wilson95a19ab2017-10-09 12:02:58 +0100824 struct drm_printer p = drm_info_printer(i915->drm.dev);
825
Chris Wilson87dc03a2017-09-15 14:09:29 +0100826 pr_err("Failed to start request %x, at %x\n",
827 rq->fence.seqno, hws_seqno(&h, rq));
Chris Wilson95a19ab2017-10-09 12:02:58 +0100828 intel_engine_dump(rq->engine, &p);
Chris Wilson87dc03a2017-09-15 14:09:29 +0100829
830 i915_reset(i915, 0);
831 i915_gem_set_wedged(i915);
832
Michel Thierryabeb4de2017-06-20 10:57:50 +0100833 err = -EIO;
Chris Wilson774eed42017-06-23 14:19:07 +0100834 goto err_request;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100835 }
836
Michel Thierryabeb4de2017-06-20 10:57:50 +0100837 mutex_unlock(&i915->drm.struct_mutex);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100838
Chris Wilson41533942017-07-28 12:21:10 +0100839 /* Temporarily disable error capture */
840 error = xchg(&i915->gpu_error.first_error, (void *)-1);
Michel Thierryabeb4de2017-06-20 10:57:50 +0100841
Chris Wilson41533942017-07-28 12:21:10 +0100842 engine->hangcheck.stalled = true;
843 engine->hangcheck.seqno = intel_engine_get_seqno(engine);
844
845 i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
846
847 xchg(&i915->gpu_error.first_error, error);
848
849 mutex_lock(&i915->drm.struct_mutex);
850
851 if (rq->fence.error != -EIO) {
852 pr_err("Guilty request not identified!\n");
Michel Thierryabeb4de2017-06-20 10:57:50 +0100853 err = -EINVAL;
Chris Wilson41533942017-07-28 12:21:10 +0100854 goto err_request;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100855 }
856
Chris Wilson774eed42017-06-23 14:19:07 +0100857err_request:
858 i915_gem_request_put(rq);
859err_fini:
860 hang_fini(&h);
861err_unlock:
862 mutex_unlock(&i915->drm.struct_mutex);
Chris Wilson41533942017-07-28 12:21:10 +0100863 return err;
Michel Thierryabeb4de2017-06-20 10:57:50 +0100864}
865
Chris Wilson496b5752017-02-13 17:15:58 +0000866int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
867{
868 static const struct i915_subtest tests[] = {
Chris Wilson87dc03a2017-09-15 14:09:29 +0100869 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
Chris Wilson496b5752017-02-13 17:15:58 +0000870 SUBTEST(igt_hang_sanitycheck),
Michel Thierryabeb4de2017-06-20 10:57:50 +0100871 SUBTEST(igt_reset_engine),
Chris Wilson79f0f472017-07-21 13:32:34 +0100872 SUBTEST(igt_reset_active_engines),
Chris Wilson496b5752017-02-13 17:15:58 +0000873 SUBTEST(igt_wait_reset),
874 SUBTEST(igt_reset_queue),
Chris Wilson41533942017-07-28 12:21:10 +0100875 SUBTEST(igt_handle_error),
Chris Wilson496b5752017-02-13 17:15:58 +0000876 };
Chris Wilsonff97d3a2017-10-09 12:03:00 +0100877 int err;
Chris Wilson496b5752017-02-13 17:15:58 +0000878
879 if (!intel_has_gpu_reset(i915))
880 return 0;
881
Chris Wilsonff97d3a2017-10-09 12:03:00 +0100882 intel_runtime_pm_get(i915);
883
884 err = i915_subtests(tests, i915);
885
886 intel_runtime_pm_put(i915);
887
888 return err;
Chris Wilson496b5752017-02-13 17:15:58 +0000889}