Blame - drivers/gpu/drm/i915/intel_breadcrumbs.c - kernel/msm-5.4

blob: fa1e957f74e37c25cc7e8a7cbbb6c8ef2f241588 [file] [log] [blame]

Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	1	/*
				2	* Copyright © 2015 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	25	#include <linux/kthread.h>
				26
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	27	#include "i915_drv.h"
				28
				29	static void intel_breadcrumbs_fake_irq(unsigned long data)
				30	{
				31	struct intel_engine_cs engine = (struct intel_engine_cs )data;
				32
				33	/*
				34	* The timer persists in case we cannot enable interrupts,
				35	* or if we have previously seen seqno/interrupt incoherency
				36	* ("missed interrupt" syndrome). Here the worker will wake up
				37	* every jiffie in order to kick the oldest waiter to do the
				38	* coherent seqno check.
				39	*/
				40	rcu_read_lock();
				41	if (intel_engine_wakeup(engine))
				42	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
				43	rcu_read_unlock();
				44	}
				45
				46	static void irq_enable(struct intel_engine_cs *engine)
				47	{
Chris Wilson	3d5564e	2016-07-01 17:23:23 +0100	[diff] [blame]	48	/* Enabling the IRQ may miss the generation of the interrupt, but
				49	* we still need to force the barrier before reading the seqno,
				50	* just in case.
				51	*/
				52	engine->irq_posted = true;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	53	WARN_ON(!engine->irq_get(engine));
				54	}
				55
				56	static void irq_disable(struct intel_engine_cs *engine)
				57	{
				58	engine->irq_put(engine);
Chris Wilson	3d5564e	2016-07-01 17:23:23 +0100	[diff] [blame]	59	engine->irq_posted = false;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	60	}
				61
				62	static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
				63	{
				64	struct intel_engine_cs *engine =
				65	container_of(b, struct intel_engine_cs, breadcrumbs);
				66	struct drm_i915_private *i915 = engine->i915;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	67
				68	assert_spin_locked(&b->lock);
				69	if (b->rpm_wakelock)
				70	return false;
				71
				72	/* Since we are waiting on a request, the GPU should be busy
				73	* and should have its own rpm reference. For completeness,
				74	* record an rpm reference for ourselves to cover the
				75	* interrupt we unmask.
				76	*/
				77	intel_runtime_pm_get_noresume(i915);
				78	b->rpm_wakelock = true;
				79
				80	/* No interrupts? Kick the waiter every jiffie! */
				81	if (intel_irqs_enabled(i915)) {
Chris Wilson	3d5564e	2016-07-01 17:23:23 +0100	[diff] [blame]	82	if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	83	irq_enable(engine);
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	84	b->irq_enabled = true;
				85	}
				86
				87	if (!b->irq_enabled \|\|
				88	test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
				89	mod_timer(&b->fake_irq, jiffies + 1);
				90
Chris Wilson	3d5564e	2016-07-01 17:23:23 +0100	[diff] [blame]	91	return engine->irq_posted;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	92	}
				93
				94	static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
				95	{
				96	struct intel_engine_cs *engine =
				97	container_of(b, struct intel_engine_cs, breadcrumbs);
				98
				99	assert_spin_locked(&b->lock);
				100	if (!b->rpm_wakelock)
				101	return;
				102
				103	if (b->irq_enabled) {
				104	irq_disable(engine);
				105	b->irq_enabled = false;
				106	}
				107
				108	intel_runtime_pm_put(engine->i915);
				109	b->rpm_wakelock = false;
				110	}
				111
				112	static inline struct intel_wait to_wait(struct rb_node node)
				113	{
				114	return container_of(node, struct intel_wait, node);
				115	}
				116
				117	static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
				118	struct intel_wait *wait)
				119	{
				120	assert_spin_locked(&b->lock);
				121
				122	/* This request is completed, so remove it from the tree, mark it as
				123	* complete, and then wake up the associated task.
				124	*/
				125	rb_erase(&wait->node, &b->waiters);
				126	RB_CLEAR_NODE(&wait->node);
				127
				128	wake_up_process(wait->tsk); /* implicit smp_wmb() */
				129	}
				130
				131	static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
				132	struct intel_wait *wait)
				133	{
				134	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				135	struct rb_node *p, parent, *completed;
				136	bool first;
				137	u32 seqno;
				138
				139	/* Insert the request into the retirement ordered list
				140	* of waiters by walking the rbtree. If we are the oldest
				141	* seqno in the tree (the first to be retired), then
				142	* set ourselves as the bottom-half.
				143	*
				144	* As we descend the tree, prune completed branches since we hold the
				145	* spinlock we know that the first_waiter must be delayed and can
				146	* reduce some of the sequential wake up latency if we take action
				147	* ourselves and wake up the completed tasks in parallel. Also, by
				148	* removing stale elements in the tree, we may be able to reduce the
				149	* ping-pong between the old bottom-half and ourselves as first-waiter.
				150	*/
				151	first = true;
				152	parent = NULL;
				153	completed = NULL;
Chris Wilson	1b7744e	2016-07-01 17:23:17 +0100	[diff] [blame]	154	seqno = intel_engine_get_seqno(engine);
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	155
				156	/* If the request completed before we managed to grab the spinlock,
				157	* return now before adding ourselves to the rbtree. We let the
				158	* current bottom-half handle any pending wakeups and instead
				159	* try and get out of the way quickly.
				160	*/
				161	if (i915_seqno_passed(seqno, wait->seqno)) {
				162	RB_CLEAR_NODE(&wait->node);
				163	return first;
				164	}
				165
				166	p = &b->waiters.rb_node;
				167	while (*p) {
				168	parent = *p;
				169	if (wait->seqno == to_wait(parent)->seqno) {
				170	/* We have multiple waiters on the same seqno, select
				171	* the highest priority task (that with the smallest
				172	* task->prio) to serve as the bottom-half for this
				173	* group.
				174	*/
				175	if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
				176	p = &parent->rb_right;
				177	first = false;
				178	} else {
				179	p = &parent->rb_left;
				180	}
				181	} else if (i915_seqno_passed(wait->seqno,
				182	to_wait(parent)->seqno)) {
				183	p = &parent->rb_right;
				184	if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
				185	completed = parent;
				186	else
				187	first = false;
				188	} else {
				189	p = &parent->rb_left;
				190	}
				191	}
				192	rb_link_node(&wait->node, parent, p);
				193	rb_insert_color(&wait->node, &b->waiters);
				194	GEM_BUG_ON(!first && !b->tasklet);
				195
				196	if (completed) {
				197	struct rb_node *next = rb_next(completed);
				198
				199	GEM_BUG_ON(!next && !first);
				200	if (next && next != &wait->node) {
				201	GEM_BUG_ON(first);
				202	b->first_wait = to_wait(next);
				203	smp_store_mb(b->tasklet, b->first_wait->tsk);
				204	/* As there is a delay between reading the current
				205	* seqno, processing the completed tasks and selecting
				206	* the next waiter, we may have missed the interrupt
				207	* and so need for the next bottom-half to wakeup.
				208	*
				209	* Also as we enable the IRQ, we may miss the
				210	* interrupt for that seqno, so we have to wake up
				211	* the next bottom-half in order to do a coherent check
				212	* in case the seqno passed.
				213	*/
				214	__intel_breadcrumbs_enable_irq(b);
Chris Wilson	3d5564e	2016-07-01 17:23:23 +0100	[diff] [blame]	215	if (READ_ONCE(engine->irq_posted))
				216	wake_up_process(to_wait(next)->tsk);
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	217	}
				218
				219	do {
				220	struct intel_wait *crumb = to_wait(completed);
				221	completed = rb_prev(completed);
				222	__intel_breadcrumbs_finish(b, crumb);
				223	} while (completed);
				224	}
				225
				226	if (first) {
				227	GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
				228	b->first_wait = wait;
				229	smp_store_mb(b->tasklet, wait->tsk);
				230	first = __intel_breadcrumbs_enable_irq(b);
				231	}
				232	GEM_BUG_ON(!b->tasklet);
				233	GEM_BUG_ON(!b->first_wait);
				234	GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
				235
				236	return first;
				237	}
				238
				239	bool intel_engine_add_wait(struct intel_engine_cs *engine,
				240	struct intel_wait *wait)
				241	{
				242	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				243	bool first;
				244
				245	spin_lock(&b->lock);
				246	first = __intel_engine_add_wait(engine, wait);
				247	spin_unlock(&b->lock);
				248
				249	return first;
				250	}
				251
				252	void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
				253	{
				254	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
				255	}
				256
				257	static inline bool chain_wakeup(struct rb_node *rb, int priority)
				258	{
				259	return rb && to_wait(rb)->tsk->prio <= priority;
				260	}
				261
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	262	static inline int wakeup_priority(struct intel_breadcrumbs *b,
				263	struct task_struct *tsk)
				264	{
				265	if (tsk == b->signaler)
				266	return INT_MIN;
				267	else
				268	return tsk->prio;
				269	}
				270
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	271	void intel_engine_remove_wait(struct intel_engine_cs *engine,
				272	struct intel_wait *wait)
				273	{
				274	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				275
				276	/* Quick check to see if this waiter was already decoupled from
				277	* the tree by the bottom-half to avoid contention on the spinlock
				278	* by the herd.
				279	*/
				280	if (RB_EMPTY_NODE(&wait->node))
				281	return;
				282
				283	spin_lock(&b->lock);
				284
				285	if (RB_EMPTY_NODE(&wait->node))
				286	goto out_unlock;
				287
				288	if (b->first_wait == wait) {
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	289	const int priority = wakeup_priority(b, wait->tsk);
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	290	struct rb_node *next;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	291
				292	GEM_BUG_ON(b->tasklet != wait->tsk);
				293
				294	/* We are the current bottom-half. Find the next candidate,
				295	* the first waiter in the queue on the remaining oldest
				296	* request. As multiple seqnos may complete in the time it
				297	* takes us to wake up and find the next waiter, we have to
				298	* wake up that waiter for it to perform its own coherent
				299	* completion check.
				300	*/
				301	next = rb_next(&wait->node);
				302	if (chain_wakeup(next, priority)) {
				303	/* If the next waiter is already complete,
				304	* wake it up and continue onto the next waiter. So
				305	* if have a small herd, they will wake up in parallel
				306	* rather than sequentially, which should reduce
				307	* the overall latency in waking all the completed
				308	* clients.
				309	*
				310	* However, waking up a chain adds extra latency to
				311	* the first_waiter. This is undesirable if that
				312	* waiter is a high priority task.
				313	*/
Chris Wilson	1b7744e	2016-07-01 17:23:17 +0100	[diff] [blame]	314	u32 seqno = intel_engine_get_seqno(engine);
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	315
				316	while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
				317	struct rb_node *n = rb_next(next);
				318
				319	__intel_breadcrumbs_finish(b, to_wait(next));
				320	next = n;
				321	if (!chain_wakeup(next, priority))
				322	break;
				323	}
				324	}
				325
				326	if (next) {
				327	/* In our haste, we may have completed the first waiter
				328	* before we enabled the interrupt. Do so now as we
				329	* have a second waiter for a future seqno. Afterwards,
				330	* we have to wake up that waiter in case we missed
				331	* the interrupt, or if we have to handle an
				332	* exception rather than a seqno completion.
				333	*/
				334	b->first_wait = to_wait(next);
				335	smp_store_mb(b->tasklet, b->first_wait->tsk);
				336	if (b->first_wait->seqno != wait->seqno)
				337	__intel_breadcrumbs_enable_irq(b);
				338	wake_up_process(b->tasklet);
				339	} else {
				340	b->first_wait = NULL;
				341	WRITE_ONCE(b->tasklet, NULL);
				342	__intel_breadcrumbs_disable_irq(b);
				343	}
				344	} else {
				345	GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
				346	}
				347
				348	GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
				349	rb_erase(&wait->node, &b->waiters);
				350
				351	out_unlock:
				352	GEM_BUG_ON(b->first_wait == wait);
				353	GEM_BUG_ON(rb_first(&b->waiters) !=
				354	(b->first_wait ? &b->first_wait->node : NULL));
				355	GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters));
				356	spin_unlock(&b->lock);
				357	}
				358
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	359	struct signal {
				360	struct rb_node node;
				361	struct intel_wait wait;
				362	struct drm_i915_gem_request *request;
				363	};
				364
				365	static bool signal_complete(struct signal *signal)
				366	{
				367	if (!signal)
				368	return false;
				369
				370	/* If another process served as the bottom-half it may have already
				371	* signalled that this wait is already completed.
				372	*/
				373	if (intel_wait_complete(&signal->wait))
				374	return true;
				375
				376	/* Carefully check if the request is complete, giving time for the
				377	* seqno to be visible or if the GPU hung.
				378	*/
				379	if (__i915_request_irq_complete(signal->request))
				380	return true;
				381
				382	return false;
				383	}
				384
				385	static struct signal to_signal(struct rb_node rb)
				386	{
				387	return container_of(rb, struct signal, node);
				388	}
				389
				390	static void signaler_set_rtpriority(void)
				391	{
				392	struct sched_param param = { .sched_priority = 1 };
				393
				394	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
				395	}
				396
				397	static int intel_breadcrumbs_signaler(void *arg)
				398	{
				399	struct intel_engine_cs *engine = arg;
				400	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				401	struct signal *signal;
				402
				403	/* Install ourselves with high priority to reduce signalling latency */
				404	signaler_set_rtpriority();
				405
				406	do {
				407	set_current_state(TASK_INTERRUPTIBLE);
				408
				409	/* We are either woken up by the interrupt bottom-half,
				410	* or by a client adding a new signaller. In both cases,
				411	* the GPU seqno may have advanced beyond our oldest signal.
				412	* If it has, propagate the signal, remove the waiter and
				413	* check again with the next oldest signal. Otherwise we
				414	* need to wait for a new interrupt from the GPU or for
				415	* a new client.
				416	*/
				417	signal = READ_ONCE(b->first_signal);
				418	if (signal_complete(signal)) {
				419	/* Wake up all other completed waiters and select the
				420	* next bottom-half for the next user interrupt.
				421	*/
				422	intel_engine_remove_wait(engine, &signal->wait);
				423
				424	i915_gem_request_unreference(signal->request);
				425
				426	/* Find the next oldest signal. Note that as we have
				427	* not been holding the lock, another client may
				428	* have installed an even older signal than the one
				429	* we just completed - so double check we are still
				430	* the oldest before picking the next one.
				431	*/
				432	spin_lock(&b->lock);
				433	if (signal == b->first_signal)
				434	b->first_signal = rb_next(&signal->node);
				435	rb_erase(&signal->node, &b->signals);
				436	spin_unlock(&b->lock);
				437
				438	kfree(signal);
				439	} else {
				440	if (kthread_should_stop())
				441	break;
				442
				443	schedule();
				444	}
				445	} while (1);
				446	__set_current_state(TASK_RUNNING);
				447
				448	return 0;
				449	}
				450
				451	int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
				452	{
				453	struct intel_engine_cs *engine = request->engine;
				454	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				455	struct rb_node parent, *p;
				456	struct signal *signal;
				457	bool first, wakeup;
				458
				459	signal = kmalloc(sizeof(*signal), GFP_ATOMIC);
				460	if (unlikely(!signal))
				461	return -ENOMEM;
				462
				463	signal->wait.tsk = b->signaler;
				464	signal->wait.seqno = request->seqno;
				465
				466	signal->request = i915_gem_request_reference(request);
				467
				468	/* First add ourselves into the list of waiters, but register our
				469	* bottom-half as the signaller thread. As per usual, only the oldest
				470	* waiter (not just signaller) is tasked as the bottom-half waking
				471	* up all completed waiters after the user interrupt.
				472	*
				473	* If we are the oldest waiter, enable the irq (after which we
				474	* must double check that the seqno did not complete).
				475	*/
				476	wakeup = intel_engine_add_wait(engine, &signal->wait);
				477
				478	/* Now insert ourselves into the retirement ordered list of signals
				479	* on this engine. We track the oldest seqno as that will be the
				480	* first signal to complete.
				481	*/
				482	spin_lock(&b->lock);
				483	parent = NULL;
				484	first = true;
				485	p = &b->signals.rb_node;
				486	while (*p) {
				487	parent = *p;
				488	if (i915_seqno_passed(signal->wait.seqno,
				489	to_signal(parent)->wait.seqno)) {
				490	p = &parent->rb_right;
				491	first = false;
				492	} else {
				493	p = &parent->rb_left;
				494	}
				495	}
				496	rb_link_node(&signal->node, parent, p);
				497	rb_insert_color(&signal->node, &b->signals);
				498	if (first)
				499	smp_store_mb(b->first_signal, signal);
				500	spin_unlock(&b->lock);
				501
				502	if (wakeup)
				503	wake_up_process(b->signaler);
				504
				505	return 0;
				506	}
				507
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	508	int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
				509	{
				510	struct intel_breadcrumbs *b = &engine->breadcrumbs;
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	511	struct task_struct *tsk;
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	512
				513	spin_lock_init(&b->lock);
				514	setup_timer(&b->fake_irq,
				515	intel_breadcrumbs_fake_irq,
				516	(unsigned long)engine);
				517
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	518	/* Spawn a thread to provide a common bottom-half for all signals.
				519	* As this is an asynchronous interface we cannot steal the current
				520	* task for handling the bottom-half to the user interrupt, therefore
				521	* we create a thread to do the coherent seqno dance after the
				522	* interrupt and then signal the waitqueue (via the dma-buf/fence).
				523	*/
				524	tsk = kthread_run(intel_breadcrumbs_signaler, engine,
				525	"i915/signal:%d", engine->id);
				526	if (IS_ERR(tsk))
				527	return PTR_ERR(tsk);
				528
				529	b->signaler = tsk;
				530
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	531	return 0;
				532	}
				533
				534	void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
				535	{
				536	struct intel_breadcrumbs *b = &engine->breadcrumbs;
				537
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	538	if (!IS_ERR_OR_NULL(b->signaler))
				539	kthread_stop(b->signaler);
				540
Chris Wilson	688e6c7	2016-07-01 17:23:15 +0100	[diff] [blame]	541	del_timer_sync(&b->fake_irq);
				542	}
				543
				544	unsigned int intel_kick_waiters(struct drm_i915_private *i915)
				545	{
				546	struct intel_engine_cs *engine;
				547	unsigned int mask = 0;
				548
				549	/* To avoid the task_struct disappearing beneath us as we wake up
				550	* the process, we must first inspect the task_struct->state under the
				551	* RCU lock, i.e. as we call wake_up_process() we must be holding the
				552	* rcu_read_lock().
				553	*/
				554	rcu_read_lock();
				555	for_each_engine(engine, i915)
				556	if (unlikely(intel_engine_wakeup(engine)))
				557	mask \|= intel_engine_flag(engine);
				558	rcu_read_unlock();
				559
				560	return mask;
				561	}
Chris Wilson	c81d461	2016-07-01 17:23:25 +0100	[diff] [blame^]	562
				563	unsigned int intel_kick_signalers(struct drm_i915_private *i915)
				564	{
				565	struct intel_engine_cs *engine;
				566	unsigned int mask = 0;
				567
				568	for_each_engine(engine, i915) {
				569	if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
				570	wake_up_process(engine->breadcrumbs.signaler);
				571	mask \|= intel_engine_flag(engine);
				572	}
				573	}
				574
				575	return mask;
				576	}