Blame - drivers/gpu/drm/i915/selftests/intel_hangcheck.c - kernel/msm-5.4

blob: d4acee6730e9bc5543a4d618e340fe158f3421fe [file] [log] [blame]

Chris Wilson	496b575	2017-02-13 17:15:58 +0000	[diff] [blame^]	1	/*
				2	* Copyright © 2016 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
				25	#include "../i915_selftest.h"
				26
				27	struct hang {
				28	struct drm_i915_private *i915;
				29	struct drm_i915_gem_object *hws;
				30	struct drm_i915_gem_object *obj;
				31	u32 *seqno;
				32	u32 *batch;
				33	};
				34
				35	static int hang_init(struct hang h, struct drm_i915_private i915)
				36	{
				37	void *vaddr;
				38	int err;
				39
				40	memset(h, 0, sizeof(*h));
				41	h->i915 = i915;
				42
				43	h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
				44	if (IS_ERR(h->hws))
				45	return PTR_ERR(h->hws);
				46
				47	h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
				48	if (IS_ERR(h->obj)) {
				49	err = PTR_ERR(h->obj);
				50	goto err_hws;
				51	}
				52
				53	i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
				54	vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
				55	if (IS_ERR(vaddr)) {
				56	err = PTR_ERR(vaddr);
				57	goto err_obj;
				58	}
				59	h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
				60
				61	vaddr = i915_gem_object_pin_map(h->obj,
				62	HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
				63	if (IS_ERR(vaddr)) {
				64	err = PTR_ERR(vaddr);
				65	goto err_unpin_hws;
				66	}
				67	h->batch = vaddr;
				68
				69	return 0;
				70
				71	err_unpin_hws:
				72	i915_gem_object_unpin_map(h->hws);
				73	err_obj:
				74	i915_gem_object_put(h->obj);
				75	err_hws:
				76	i915_gem_object_put(h->hws);
				77	return err;
				78	}
				79
				80	static u64 hws_address(const struct i915_vma *hws,
				81	const struct drm_i915_gem_request *rq)
				82	{
				83	return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
				84	}
				85
				86	static int emit_recurse_batch(struct hang *h,
				87	struct drm_i915_gem_request *rq)
				88	{
				89	struct drm_i915_private *i915 = h->i915;
				90	struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
				91	struct i915_vma hws, vma;
				92	unsigned int flags;
				93	u32 *batch;
				94	int err;
				95
				96	vma = i915_vma_instance(h->obj, vm, NULL);
				97	if (IS_ERR(vma))
				98	return PTR_ERR(vma);
				99
				100	hws = i915_vma_instance(h->hws, vm, NULL);
				101	if (IS_ERR(hws))
				102	return PTR_ERR(hws);
				103
				104	err = i915_vma_pin(vma, 0, 0, PIN_USER);
				105	if (err)
				106	return err;
				107
				108	err = i915_vma_pin(hws, 0, 0, PIN_USER);
				109	if (err)
				110	goto unpin_vma;
				111
				112	err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
				113	if (err)
				114	goto unpin_hws;
				115
				116	err = i915_switch_context(rq);
				117	if (err)
				118	goto unpin_hws;
				119
				120	i915_vma_move_to_active(vma, rq, 0);
				121	if (!i915_gem_object_has_active_reference(vma->obj)) {
				122	i915_gem_object_get(vma->obj);
				123	i915_gem_object_set_active_reference(vma->obj);
				124	}
				125
				126	i915_vma_move_to_active(hws, rq, 0);
				127	if (!i915_gem_object_has_active_reference(hws->obj)) {
				128	i915_gem_object_get(hws->obj);
				129	i915_gem_object_set_active_reference(hws->obj);
				130	}
				131
				132	batch = h->batch;
				133	if (INTEL_GEN(i915) >= 8) {
				134	*batch++ = MI_STORE_DWORD_IMM_GEN4;
				135	*batch++ = lower_32_bits(hws_address(hws, rq));
				136	*batch++ = upper_32_bits(hws_address(hws, rq));
				137	*batch++ = rq->fence.seqno;
				138	*batch++ = MI_BATCH_BUFFER_START \| 1 << 8 \| 1;
				139	*batch++ = lower_32_bits(vma->node.start);
				140	*batch++ = upper_32_bits(vma->node.start);
				141	} else if (INTEL_GEN(i915) >= 6) {
				142	*batch++ = MI_STORE_DWORD_IMM_GEN4;
				143	*batch++ = 0;
				144	*batch++ = lower_32_bits(hws_address(hws, rq));
				145	*batch++ = rq->fence.seqno;
				146	*batch++ = MI_BATCH_BUFFER_START \| 1 << 8;
				147	*batch++ = lower_32_bits(vma->node.start);
				148	} else if (INTEL_GEN(i915) >= 4) {
				149	*batch++ = MI_STORE_DWORD_IMM_GEN4 \| 1 << 22;
				150	*batch++ = 0;
				151	*batch++ = lower_32_bits(hws_address(hws, rq));
				152	*batch++ = rq->fence.seqno;
				153	*batch++ = MI_BATCH_BUFFER_START \| 2 << 6;
				154	*batch++ = lower_32_bits(vma->node.start);
				155	} else {
				156	*batch++ = MI_STORE_DWORD_IMM;
				157	*batch++ = lower_32_bits(hws_address(hws, rq));
				158	*batch++ = rq->fence.seqno;
				159	*batch++ = MI_BATCH_BUFFER_START \| 2 << 6 \| 1;
				160	*batch++ = lower_32_bits(vma->node.start);
				161	}
				162	batch++ = MI_BATCH_BUFFER_END; / not reached */
				163
				164	flags = 0;
				165	if (INTEL_GEN(vm->i915) <= 5)
				166	flags \|= I915_DISPATCH_SECURE;
				167
				168	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
				169
				170	unpin_hws:
				171	i915_vma_unpin(hws);
				172	unpin_vma:
				173	i915_vma_unpin(vma);
				174	return err;
				175	}
				176
				177	static struct drm_i915_gem_request *
				178	hang_create_request(struct hang *h,
				179	struct intel_engine_cs *engine,
				180	struct i915_gem_context *ctx)
				181	{
				182	struct drm_i915_gem_request *rq;
				183	int err;
				184
				185	if (i915_gem_object_is_active(h->obj)) {
				186	struct drm_i915_gem_object *obj;
				187	void *vaddr;
				188
				189	obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
				190	if (IS_ERR(obj))
				191	return ERR_CAST(obj);
				192
				193	vaddr = i915_gem_object_pin_map(obj,
				194	HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
				195	if (IS_ERR(vaddr)) {
				196	i915_gem_object_put(obj);
				197	return ERR_CAST(vaddr);
				198	}
				199
				200	i915_gem_object_unpin_map(h->obj);
				201	i915_gem_object_put(h->obj);
				202
				203	h->obj = obj;
				204	h->batch = vaddr;
				205	}
				206
				207	rq = i915_gem_request_alloc(engine, ctx);
				208	if (IS_ERR(rq))
				209	return rq;
				210
				211	err = emit_recurse_batch(h, rq);
				212	if (err) {
				213	__i915_add_request(rq, false);
				214	return ERR_PTR(err);
				215	}
				216
				217	return rq;
				218	}
				219
				220	static u32 hws_seqno(const struct hang *h,
				221	const struct drm_i915_gem_request *rq)
				222	{
				223	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
				224	}
				225
				226	static void hang_fini(struct hang *h)
				227	{
				228	*h->batch = MI_BATCH_BUFFER_END;
				229	wmb();
				230
				231	i915_gem_object_unpin_map(h->obj);
				232	i915_gem_object_put(h->obj);
				233
				234	i915_gem_object_unpin_map(h->hws);
				235	i915_gem_object_put(h->hws);
				236
				237	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
				238	i915_gem_retire_requests(h->i915);
				239	}
				240
				241	static int igt_hang_sanitycheck(void *arg)
				242	{
				243	struct drm_i915_private *i915 = arg;
				244	struct drm_i915_gem_request *rq;
				245	struct intel_engine_cs *engine;
				246	enum intel_engine_id id;
				247	struct hang h;
				248	int err;
				249
				250	/* Basic check that we can execute our hanging batch */
				251
				252	if (!igt_can_mi_store_dword_imm(i915))
				253	return 0;
				254
				255	mutex_lock(&i915->drm.struct_mutex);
				256	err = hang_init(&h, i915);
				257	if (err)
				258	goto unlock;
				259
				260	for_each_engine(engine, i915, id) {
				261	long timeout;
				262
				263	rq = hang_create_request(&h, engine, i915->kernel_context);
				264	if (IS_ERR(rq)) {
				265	err = PTR_ERR(rq);
				266	pr_err("Failed to create request for %s, err=%d\n",
				267	engine->name, err);
				268	goto fini;
				269	}
				270
				271	i915_gem_request_get(rq);
				272
				273	*h.batch = MI_BATCH_BUFFER_END;
				274	__i915_add_request(rq, true);
				275
				276	timeout = i915_wait_request(rq,
				277	I915_WAIT_LOCKED,
				278	MAX_SCHEDULE_TIMEOUT);
				279	i915_gem_request_put(rq);
				280
				281	if (timeout < 0) {
				282	err = timeout;
				283	pr_err("Wait for request failed on %s, err=%d\n",
				284	engine->name, err);
				285	goto fini;
				286	}
				287	}
				288
				289	fini:
				290	hang_fini(&h);
				291	unlock:
				292	mutex_unlock(&i915->drm.struct_mutex);
				293	return err;
				294	}
				295
				296	static int igt_global_reset(void *arg)
				297	{
				298	struct drm_i915_private *i915 = arg;
				299	unsigned int reset_count;
				300	int err = 0;
				301
				302	/* Check that we can issue a global GPU reset */
				303
				304	set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags);
				305
				306	mutex_lock(&i915->drm.struct_mutex);
				307	reset_count = i915_reset_count(&i915->gpu_error);
				308
				309	i915_reset(i915);
				310
				311	if (i915_reset_count(&i915->gpu_error) == reset_count) {
				312	pr_err("No GPU reset recorded!\n");
				313	err = -EINVAL;
				314	}
				315	mutex_unlock(&i915->drm.struct_mutex);
				316
				317	GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags));
				318	if (i915_terminally_wedged(&i915->gpu_error))
				319	err = -EIO;
				320
				321	return err;
				322	}
				323
				324	static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
				325	{
				326	u32 reset_count;
				327
				328	rq->engine->hangcheck.stalled = true;
				329	rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
				330
				331	reset_count = i915_reset_count(&rq->i915->gpu_error);
				332
				333	set_bit(I915_RESET_IN_PROGRESS, &rq->i915->gpu_error.flags);
				334	wake_up_all(&rq->i915->gpu_error.wait_queue);
				335
				336	return reset_count;
				337	}
				338
				339	static bool wait_for_hang(struct hang h, struct drm_i915_gem_request rq)
				340	{
				341	return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
				342	rq->fence.seqno),
				343	10) &&
				344	wait_for(i915_seqno_passed(hws_seqno(h, rq),
				345	rq->fence.seqno),
				346	1000));
				347	}
				348
				349	static int igt_wait_reset(void *arg)
				350	{
				351	struct drm_i915_private *i915 = arg;
				352	struct drm_i915_gem_request *rq;
				353	unsigned int reset_count;
				354	struct hang h;
				355	long timeout;
				356	int err;
				357
				358	/* Check that we detect a stuck waiter and issue a reset */
				359
				360	set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags);
				361
				362	mutex_lock(&i915->drm.struct_mutex);
				363	err = hang_init(&h, i915);
				364	if (err)
				365	goto unlock;
				366
				367	rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
				368	if (IS_ERR(rq)) {
				369	err = PTR_ERR(rq);
				370	goto fini;
				371	}
				372
				373	i915_gem_request_get(rq);
				374	__i915_add_request(rq, true);
				375
				376	if (!wait_for_hang(&h, rq)) {
				377	pr_err("Failed to start request %x\n", rq->fence.seqno);
				378	err = -EIO;
				379	goto out_rq;
				380	}
				381
				382	reset_count = fake_hangcheck(rq);
				383
				384	timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
				385	if (timeout < 0) {
				386	pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
				387	timeout);
				388	err = timeout;
				389	goto out_rq;
				390	}
				391	GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags));
				392
				393	if (i915_reset_count(&i915->gpu_error) == reset_count) {
				394	pr_err("No GPU reset recorded!\n");
				395	err = -EINVAL;
				396	goto out_rq;
				397	}
				398
				399	out_rq:
				400	i915_gem_request_put(rq);
				401	fini:
				402	hang_fini(&h);
				403	unlock:
				404	mutex_unlock(&i915->drm.struct_mutex);
				405
				406	if (i915_terminally_wedged(&i915->gpu_error))
				407	return -EIO;
				408
				409	return err;
				410	}
				411
				412	static int igt_reset_queue(void *arg)
				413	{
				414	struct drm_i915_private *i915 = arg;
				415	struct intel_engine_cs *engine;
				416	enum intel_engine_id id;
				417	struct hang h;
				418	int err;
				419
				420	/* Check that we replay pending requests following a hang */
				421
				422	if (!igt_can_mi_store_dword_imm(i915))
				423	return 0;
				424
				425	mutex_lock(&i915->drm.struct_mutex);
				426	err = hang_init(&h, i915);
				427	if (err)
				428	goto unlock;
				429
				430	for_each_engine(engine, i915, id) {
				431	struct drm_i915_gem_request *prev;
				432	IGT_TIMEOUT(end_time);
				433	unsigned int count;
				434
				435	prev = hang_create_request(&h, engine, i915->kernel_context);
				436	if (IS_ERR(prev)) {
				437	err = PTR_ERR(prev);
				438	goto fini;
				439	}
				440
				441	i915_gem_request_get(prev);
				442	__i915_add_request(prev, true);
				443
				444	count = 0;
				445	do {
				446	struct drm_i915_gem_request *rq;
				447	unsigned int reset_count;
				448
				449	rq = hang_create_request(&h,
				450	engine,
				451	i915->kernel_context);
				452	if (IS_ERR(rq)) {
				453	err = PTR_ERR(rq);
				454	goto fini;
				455	}
				456
				457	i915_gem_request_get(rq);
				458	__i915_add_request(rq, true);
				459
				460	if (!wait_for_hang(&h, prev)) {
				461	pr_err("Failed to start request %x\n",
				462	prev->fence.seqno);
				463	i915_gem_request_put(rq);
				464	i915_gem_request_put(prev);
				465	err = -EIO;
				466	goto fini;
				467	}
				468
				469	reset_count = fake_hangcheck(prev);
				470
				471	i915_reset(i915);
				472
				473	GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS,
				474	&i915->gpu_error.flags));
				475	if (prev->fence.error != -EIO) {
				476	pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
				477	prev->fence.error);
				478	i915_gem_request_put(rq);
				479	i915_gem_request_put(prev);
				480	err = -EINVAL;
				481	goto fini;
				482	}
				483
				484	if (rq->fence.error) {
				485	pr_err("Fence error status not zero [%d] after unrelated reset\n",
				486	rq->fence.error);
				487	i915_gem_request_put(rq);
				488	i915_gem_request_put(prev);
				489	err = -EINVAL;
				490	goto fini;
				491	}
				492
				493	if (i915_reset_count(&i915->gpu_error) == reset_count) {
				494	pr_err("No GPU reset recorded!\n");
				495	i915_gem_request_put(rq);
				496	i915_gem_request_put(prev);
				497	err = -EINVAL;
				498	goto fini;
				499	}
				500
				501	i915_gem_request_put(prev);
				502	prev = rq;
				503	count++;
				504	} while (time_before(jiffies, end_time));
				505	pr_info("%s: Completed %d resets\n", engine->name, count);
				506
				507	*h.batch = MI_BATCH_BUFFER_END;
				508	wmb();
				509
				510	i915_gem_request_put(prev);
				511	}
				512
				513	fini:
				514	hang_fini(&h);
				515	unlock:
				516	mutex_unlock(&i915->drm.struct_mutex);
				517
				518	if (i915_terminally_wedged(&i915->gpu_error))
				519	return -EIO;
				520
				521	return err;
				522	}
				523
				524	int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
				525	{
				526	static const struct i915_subtest tests[] = {
				527	SUBTEST(igt_hang_sanitycheck),
				528	SUBTEST(igt_global_reset),
				529	SUBTEST(igt_wait_reset),
				530	SUBTEST(igt_reset_queue),
				531	};
				532
				533	if (!intel_has_gpu_reset(i915))
				534	return 0;
				535
				536	return i915_subtests(tests, i915);
				537	}