Blame - drivers/gpu/drm/i915/i915_gem_request.c - kernel/msm-5.4

blob: 5e38bc04a4f0c917c7ab27974b3f3f8161c737da [file] [log] [blame]

Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	1	/*
				2	* Copyright © 2008-2015 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*
				23	*/
				24
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	25	#include <linux/prefetch.h>
Chris Wilson	b52992c	2016-10-28 13:58:24 +0100	[diff] [blame^]	26	#include <linux/dma-fence-array.h>
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	27
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	28	#include "i915_drv.h"
				29
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	30	static const char i915_fence_get_driver_name(struct dma_fence fence)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	31	{
				32	return "i915";
				33	}
				34
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	35	static const char i915_fence_get_timeline_name(struct dma_fence fence)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	36	{
				37	/* Timelines are bound by eviction to a VM. However, since
				38	* we only have a global seqno at the moment, we only have
				39	* a single timeline. Note that each timeline will have
				40	* multiple execution contexts (fence contexts) as we allow
				41	* engines within a single timeline to execute in parallel.
				42	*/
				43	return "global";
				44	}
				45
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	46	static bool i915_fence_signaled(struct dma_fence *fence)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	47	{
				48	return i915_gem_request_completed(to_request(fence));
				49	}
				50
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	51	static bool i915_fence_enable_signaling(struct dma_fence *fence)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	52	{
				53	if (i915_fence_signaled(fence))
				54	return false;
				55
				56	intel_engine_enable_signaling(to_request(fence));
				57	return true;
				58	}
				59
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	60	static signed long i915_fence_wait(struct dma_fence *fence,
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	61	bool interruptible,
				62	signed long timeout_jiffies)
				63	{
				64	s64 timeout_ns, *timeout;
				65	int ret;
				66
				67	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
				68	timeout_ns = jiffies_to_nsecs(timeout_jiffies);
				69	timeout = &timeout_ns;
				70	} else {
				71	timeout = NULL;
				72	}
				73
Chris Wilson	776f323	2016-08-04 07:52:40 +0100	[diff] [blame]	74	ret = i915_wait_request(to_request(fence),
				75	interruptible, timeout,
				76	NO_WAITBOOST);
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	77	if (ret == -ETIME)
				78	return 0;
				79
				80	if (ret < 0)
				81	return ret;
				82
				83	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
				84	timeout_jiffies = nsecs_to_jiffies(timeout_ns);
				85
				86	return timeout_jiffies;
				87	}
				88
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	89	static void i915_fence_value_str(struct dma_fence fence, char str, int size)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	90	{
				91	snprintf(str, size, "%u", fence->seqno);
				92	}
				93
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	94	static void i915_fence_timeline_value_str(struct dma_fence fence, char str,
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	95	int size)
				96	{
				97	snprintf(str, size, "%u",
				98	intel_engine_get_seqno(to_request(fence)->engine));
				99	}
				100
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	101	static void i915_fence_release(struct dma_fence *fence)
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	102	{
				103	struct drm_i915_gem_request *req = to_request(fence);
				104
				105	kmem_cache_free(req->i915->requests, req);
				106	}
				107
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	108	const struct dma_fence_ops i915_fence_ops = {
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	109	.get_driver_name = i915_fence_get_driver_name,
				110	.get_timeline_name = i915_fence_get_timeline_name,
				111	.enable_signaling = i915_fence_enable_signaling,
				112	.signaled = i915_fence_signaled,
				113	.wait = i915_fence_wait,
				114	.release = i915_fence_release,
				115	.fence_value_str = i915_fence_value_str,
				116	.timeline_value_str = i915_fence_timeline_value_str,
				117	};
				118
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	119	int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
				120	struct drm_file *file)
				121	{
				122	struct drm_i915_private *dev_private;
				123	struct drm_i915_file_private *file_priv;
				124
				125	WARN_ON(!req \|\| !file \|\| req->file_priv);
				126
				127	if (!req \|\| !file)
				128	return -EINVAL;
				129
				130	if (req->file_priv)
				131	return -EINVAL;
				132
				133	dev_private = req->i915;
				134	file_priv = file->driver_priv;
				135
				136	spin_lock(&file_priv->mm.lock);
				137	req->file_priv = file_priv;
				138	list_add_tail(&req->client_list, &file_priv->mm.request_list);
				139	spin_unlock(&file_priv->mm.lock);
				140
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	141	return 0;
				142	}
				143
				144	static inline void
				145	i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
				146	{
				147	struct drm_i915_file_private *file_priv = request->file_priv;
				148
				149	if (!file_priv)
				150	return;
				151
				152	spin_lock(&file_priv->mm.lock);
				153	list_del(&request->client_list);
				154	request->file_priv = NULL;
				155	spin_unlock(&file_priv->mm.lock);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	156	}
				157
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	158	void i915_gem_retire_noop(struct i915_gem_active *active,
				159	struct drm_i915_gem_request *request)
				160	{
				161	/* Space left intentionally blank */
				162	}
				163
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	164	static void i915_gem_request_retire(struct drm_i915_gem_request *request)
				165	{
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	166	struct i915_gem_active active, next;
				167
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	168	trace_i915_gem_request_retire(request);
Chris Wilson	209b3f7	2016-08-05 10:14:24 +0100	[diff] [blame]	169	list_del(&request->link);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	170
				171	/* We know the GPU must have read the request to have
				172	* sent us the seqno + interrupt, so use the position
				173	* of tail of the request to update the last known position
				174	* of the GPU head.
				175	*
				176	* Note this requires that we are always called in request
				177	* completion order.
				178	*/
Chris Wilson	675d9ad	2016-08-04 07:52:36 +0100	[diff] [blame]	179	list_del(&request->ring_link);
Chris Wilson	1dae2df	2016-08-02 22:50:19 +0100	[diff] [blame]	180	request->ring->last_retired_head = request->postfix;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	181
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	182	/* Walk through the active list, calling retire on each. This allows
				183	* objects to track their GPU activity and mark themselves as idle
				184	* when their last active request is completed (updating state
				185	* tracking lists for eviction, active references for GEM, etc).
				186	*
				187	* As the ->retire() may free the node, we decouple it first and
				188	* pass along the auxiliary information (to avoid dereferencing
				189	* the node after the callback).
				190	*/
				191	list_for_each_entry_safe(active, next, &request->active_list, link) {
				192	/* In microbenchmarks or focusing upon time inside the kernel,
				193	* we may spend an inordinate amount of time simply handling
				194	* the retirement of requests and processing their callbacks.
				195	* Of which, this loop itself is particularly hot due to the
				196	* cache misses when jumping around the list of i915_gem_active.
				197	* So we try to keep this loop as streamlined as possible and
				198	* also prefetch the next i915_gem_active to try and hide
				199	* the likely cache miss.
				200	*/
				201	prefetchw(next);
				202
				203	INIT_LIST_HEAD(&active->link);
Chris Wilson	0eafec6	2016-08-04 16:32:41 +0100	[diff] [blame]	204	RCU_INIT_POINTER(active->request, NULL);
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	205
				206	active->retire(active, request);
				207	}
				208
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	209	i915_gem_request_remove_from_client(request);
				210
				211	if (request->previous_context) {
				212	if (i915.enable_execlists)
				213	intel_lr_context_unpin(request->previous_context,
				214	request->engine);
				215	}
				216
Chris Wilson	9a6feaf	2016-07-20 13:31:50 +0100	[diff] [blame]	217	i915_gem_context_put(request->ctx);
Chris Wilson	e8a261e	2016-07-20 13:31:49 +0100	[diff] [blame]	218	i915_gem_request_put(request);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	219	}
				220
				221	void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
				222	{
				223	struct intel_engine_cs *engine = req->engine;
				224	struct drm_i915_gem_request *tmp;
				225
				226	lockdep_assert_held(&req->i915->drm.struct_mutex);
Chris Wilson	209b3f7	2016-08-05 10:14:24 +0100	[diff] [blame]	227	GEM_BUG_ON(list_empty(&req->link));
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	228
				229	do {
				230	tmp = list_first_entry(&engine->request_list,
Chris Wilson	efdf7c0	2016-08-04 07:52:33 +0100	[diff] [blame]	231	typeof(*tmp), link);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	232
				233	i915_gem_request_retire(tmp);
				234	} while (tmp != req);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	235	}
				236
Chris Wilson	8af29b0	2016-09-09 14:11:47 +0100	[diff] [blame]	237	static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	238	{
Chris Wilson	8af29b0	2016-09-09 14:11:47 +0100	[diff] [blame]	239	struct i915_gpu_error *error = &dev_priv->gpu_error;
				240
				241	if (i915_terminally_wedged(error))
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	242	return -EIO;
				243
Chris Wilson	8af29b0	2016-09-09 14:11:47 +0100	[diff] [blame]	244	if (i915_reset_in_progress(error)) {
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	245	/* Non-interruptible callers can't handle -EAGAIN, hence return
				246	* -EIO unconditionally for these.
				247	*/
Chris Wilson	8af29b0	2016-09-09 14:11:47 +0100	[diff] [blame]	248	if (!dev_priv->mm.interruptible)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	249	return -EIO;
				250
				251	return -EAGAIN;
				252	}
				253
				254	return 0;
				255	}
				256
				257	static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
				258	{
				259	struct intel_engine_cs *engine;
Akash Goel	3b3f165	2016-10-13 22:44:48 +0530	[diff] [blame]	260	enum intel_engine_id id;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	261	int ret;
				262
				263	/* Carefully retire all requests without writing to the rings */
Akash Goel	3b3f165	2016-10-13 22:44:48 +0530	[diff] [blame]	264	for_each_engine(engine, dev_priv, id) {
Chris Wilson	22dd3bb	2016-09-09 14:11:50 +0100	[diff] [blame]	265	ret = intel_engine_idle(engine,
				266	I915_WAIT_INTERRUPTIBLE \|
				267	I915_WAIT_LOCKED);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	268	if (ret)
				269	return ret;
				270	}
				271	i915_gem_retire_requests(dev_priv);
				272
				273	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
				274	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
				275	while (intel_kick_waiters(dev_priv) \|\|
				276	intel_kick_signalers(dev_priv))
				277	yield();
				278	}
				279
				280	/* Finally reset hw state */
Akash Goel	3b3f165	2016-10-13 22:44:48 +0530	[diff] [blame]	281	for_each_engine(engine, dev_priv, id)
Chris Wilson	7e37f88	2016-08-02 22:50:21 +0100	[diff] [blame]	282	intel_engine_init_seqno(engine, seqno);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	283
				284	return 0;
				285	}
				286
				287	int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
				288	{
				289	struct drm_i915_private *dev_priv = to_i915(dev);
				290	int ret;
				291
				292	if (seqno == 0)
				293	return -EINVAL;
				294
				295	/* HWS page needs to be set less than what we
				296	* will inject to ring
				297	*/
				298	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
				299	if (ret)
				300	return ret;
				301
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	302	dev_priv->next_seqno = seqno;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	303	return 0;
				304	}
				305
				306	static int i915_gem_get_seqno(struct drm_i915_private dev_priv, u32 seqno)
				307	{
				308	/* reserve 0 for non-seqno */
				309	if (unlikely(dev_priv->next_seqno == 0)) {
				310	int ret;
				311
				312	ret = i915_gem_init_seqno(dev_priv, 0);
				313	if (ret)
				314	return ret;
				315
				316	dev_priv->next_seqno = 1;
				317	}
				318
Chris Wilson	ddf07be	2016-08-02 22:50:39 +0100	[diff] [blame]	319	*seqno = dev_priv->next_seqno++;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	320	return 0;
				321	}
				322
Chris Wilson	5590af3	2016-09-09 14:11:54 +0100	[diff] [blame]	323	static int __i915_sw_fence_call
				324	submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
				325	{
				326	struct drm_i915_gem_request *request =
				327	container_of(fence, typeof(*request), submit);
				328
				329	/* Will be called from irq-context when using foreign DMA fences */
				330
				331	switch (state) {
				332	case FENCE_COMPLETE:
Chris Wilson	8687b3e	2016-10-07 07:53:24 +0100	[diff] [blame]	333	request->engine->last_submitted_seqno = request->fence.seqno;
Chris Wilson	5590af3	2016-09-09 14:11:54 +0100	[diff] [blame]	334	request->engine->submit_request(request);
				335	break;
				336
				337	case FENCE_FREE:
				338	break;
				339	}
				340
				341	return NOTIFY_DONE;
				342	}
				343
Chris Wilson	8e63717	2016-08-02 22:50:26 +0100	[diff] [blame]	344	/**
				345	* i915_gem_request_alloc - allocate a request structure
				346	*
				347	* @engine: engine that we wish to issue the request on.
				348	* @ctx: context that the request will be associated with.
				349	* This can be NULL if the request is not directly related to
				350	* any specific user context, in which case this function will
				351	* choose an appropriate context to use.
				352	*
				353	* Returns a pointer to the allocated request if successful,
				354	* or an error code if not.
				355	*/
				356	struct drm_i915_gem_request *
				357	i915_gem_request_alloc(struct intel_engine_cs *engine,
				358	struct i915_gem_context *ctx)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	359	{
				360	struct drm_i915_private *dev_priv = engine->i915;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	361	struct drm_i915_gem_request *req;
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	362	u32 seqno;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	363	int ret;
				364
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	365	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
				366	* EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
				367	* and restart.
				368	*/
Chris Wilson	8af29b0	2016-09-09 14:11:47 +0100	[diff] [blame]	369	ret = i915_gem_check_wedge(dev_priv);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	370	if (ret)
Chris Wilson	8e63717	2016-08-02 22:50:26 +0100	[diff] [blame]	371	return ERR_PTR(ret);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	372
Chris Wilson	9b5f4e5	2016-07-20 09:21:09 +0100	[diff] [blame]	373	/* Move the oldest request to the slab-cache (if not in use!) */
Chris Wilson	2a1d775	2016-07-26 12:01:51 +0100	[diff] [blame]	374	req = list_first_entry_or_null(&engine->request_list,
Chris Wilson	efdf7c0	2016-08-04 07:52:33 +0100	[diff] [blame]	375	typeof(*req), link);
Chris Wilson	2a1d775	2016-07-26 12:01:51 +0100	[diff] [blame]	376	if (req && i915_gem_request_completed(req))
				377	i915_gem_request_retire(req);
Chris Wilson	9b5f4e5	2016-07-20 09:21:09 +0100	[diff] [blame]	378
Chris Wilson	5a198b8	2016-08-09 09:23:34 +0100	[diff] [blame]	379	/* Beware: Dragons be flying overhead.
				380	*
				381	* We use RCU to look up requests in flight. The lookups may
				382	* race with the request being allocated from the slab freelist.
				383	* That is the request we are writing to here, may be in the process
Chris Wilson	1426f71	2016-08-09 17:03:22 +0100	[diff] [blame]	384	* of being read by __i915_gem_active_get_rcu(). As such,
Chris Wilson	5a198b8	2016-08-09 09:23:34 +0100	[diff] [blame]	385	* we have to be very careful when overwriting the contents. During
				386	* the RCU lookup, we change chase the request->engine pointer,
				387	* read the request->fence.seqno and increment the reference count.
				388	*
				389	* The reference count is incremented atomically. If it is zero,
				390	* the lookup knows the request is unallocated and complete. Otherwise,
				391	* it is either still in use, or has been reallocated and reset
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	392	* with dma_fence_init(). This increment is safe for release as we
				393	* check that the request we have a reference to and matches the active
Chris Wilson	5a198b8	2016-08-09 09:23:34 +0100	[diff] [blame]	394	* request.
				395	*
				396	* Before we increment the refcount, we chase the request->engine
				397	* pointer. We must not call kmem_cache_zalloc() or else we set
				398	* that pointer to NULL and cause a crash during the lookup. If
				399	* we see the request is completed (based on the value of the
				400	* old engine and seqno), the lookup is complete and reports NULL.
				401	* If we decide the request is not completed (new engine or seqno),
				402	* then we grab a reference and double check that it is still the
				403	* active request - which it won't be and restart the lookup.
				404	*
				405	* Do not use kmem_cache_zalloc() here!
				406	*/
				407	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	408	if (!req)
Chris Wilson	8e63717	2016-08-02 22:50:26 +0100	[diff] [blame]	409	return ERR_PTR(-ENOMEM);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	410
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	411	ret = i915_gem_get_seqno(dev_priv, &seqno);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	412	if (ret)
				413	goto err;
				414
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	415	spin_lock_init(&req->lock);
Chris Wilson	f54d186	2016-10-25 13:00:45 +0100	[diff] [blame]	416	dma_fence_init(&req->fence,
				417	&i915_fence_ops,
				418	&req->lock,
				419	engine->fence_context,
				420	seqno);
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	421
Chris Wilson	5590af3	2016-09-09 14:11:54 +0100	[diff] [blame]	422	i915_sw_fence_init(&req->submit, submit_notify);
				423
Chris Wilson	fa545cb	2016-08-04 07:52:35 +0100	[diff] [blame]	424	INIT_LIST_HEAD(&req->active_list);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	425	req->i915 = dev_priv;
				426	req->engine = engine;
Chris Wilson	9a6feaf	2016-07-20 13:31:50 +0100	[diff] [blame]	427	req->ctx = i915_gem_context_get(ctx);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	428
Chris Wilson	5a198b8	2016-08-09 09:23:34 +0100	[diff] [blame]	429	/* No zalloc, must clear what we need by hand */
				430	req->previous_context = NULL;
				431	req->file_priv = NULL;
Chris Wilson	058d88c	2016-08-15 10:49:06 +0100	[diff] [blame]	432	req->batch = NULL;
Chris Wilson	5a198b8	2016-08-09 09:23:34 +0100	[diff] [blame]	433
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	434	/*
				435	* Reserve space in the ring buffer for all the commands required to
				436	* eventually emit this request. This is to guarantee that the
				437	* i915_add_request() call can't fail. Note that the reserve may need
				438	* to be redone if the request is not actually submitted straight
				439	* away, e.g. because a GPU scheduler has deferred it.
				440	*/
				441	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
				442
				443	if (i915.enable_execlists)
				444	ret = intel_logical_ring_alloc_request_extras(req);
				445	else
				446	ret = intel_ring_alloc_request_extras(req);
				447	if (ret)
				448	goto err_ctx;
				449
Chris Wilson	d045446	2016-08-15 10:48:40 +0100	[diff] [blame]	450	/* Record the position of the start of the request so that
				451	* should we detect the updated seqno part-way through the
				452	* GPU processing the request, we never over-estimate the
				453	* position of the head.
				454	*/
				455	req->head = req->ring->tail;
				456
Chris Wilson	8e63717	2016-08-02 22:50:26 +0100	[diff] [blame]	457	return req;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	458
				459	err_ctx:
Chris Wilson	9a6feaf	2016-07-20 13:31:50 +0100	[diff] [blame]	460	i915_gem_context_put(ctx);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	461	err:
				462	kmem_cache_free(dev_priv->requests, req);
Chris Wilson	8e63717	2016-08-02 22:50:26 +0100	[diff] [blame]	463	return ERR_PTR(ret);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	464	}
				465
Chris Wilson	a2bc469	2016-09-09 14:11:56 +0100	[diff] [blame]	466	static int
				467	i915_gem_request_await_request(struct drm_i915_gem_request *to,
				468	struct drm_i915_gem_request *from)
				469	{
				470	int idx, ret;
				471
				472	GEM_BUG_ON(to == from);
				473
				474	if (to->engine == from->engine)
				475	return 0;
				476
				477	idx = intel_engine_sync_index(from->engine, to->engine);
				478	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
				479	return 0;
				480
				481	trace_i915_gem_ring_sync_to(to, from);
				482	if (!i915.semaphores) {
Chris Wilson	0a046a0	2016-09-09 14:12:00 +0100	[diff] [blame]	483	if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
				484	ret = i915_sw_fence_await_dma_fence(&to->submit,
				485	&from->fence, 0,
				486	GFP_KERNEL);
				487	if (ret < 0)
				488	return ret;
				489	}
Chris Wilson	a2bc469	2016-09-09 14:11:56 +0100	[diff] [blame]	490	} else {
				491	ret = to->engine->semaphore.sync_to(to, from);
				492	if (ret)
				493	return ret;
				494	}
				495
				496	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
				497	return 0;
				498	}
				499
Chris Wilson	b52992c	2016-10-28 13:58:24 +0100	[diff] [blame^]	500	int
				501	i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
				502	struct dma_fence *fence)
				503	{
				504	struct dma_fence_array *array;
				505	int ret;
				506	int i;
				507
				508	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
				509	return 0;
				510
				511	if (dma_fence_is_i915(fence))
				512	return i915_gem_request_await_request(req, to_request(fence));
				513
				514	if (!dma_fence_is_array(fence)) {
				515	ret = i915_sw_fence_await_dma_fence(&req->submit,
				516	fence, I915_FENCE_TIMEOUT,
				517	GFP_KERNEL);
				518	return ret < 0 ? ret : 0;
				519	}
				520
				521	/* Note that if the fence-array was created in signal-on-any mode,
				522	* we should not decompose it into its individual fences. However,
				523	* we don't currently store which mode the fence-array is operating
				524	* in. Fortunately, the only user of signal-on-any is private to
				525	* amdgpu and we should not see any incoming fence-array from
				526	* sync-file being in signal-on-any mode.
				527	*/
				528
				529	array = to_dma_fence_array(fence);
				530	for (i = 0; i < array->num_fences; i++) {
				531	struct dma_fence *child = array->fences[i];
				532
				533	if (dma_fence_is_i915(child))
				534	ret = i915_gem_request_await_request(req,
				535	to_request(child));
				536	else
				537	ret = i915_sw_fence_await_dma_fence(&req->submit,
				538	child, I915_FENCE_TIMEOUT,
				539	GFP_KERNEL);
				540	if (ret < 0)
				541	return ret;
				542	}
				543
				544	return 0;
				545	}
				546
Chris Wilson	a2bc469	2016-09-09 14:11:56 +0100	[diff] [blame]	547	/**
				548	* i915_gem_request_await_object - set this request to (async) wait upon a bo
				549	*
				550	* @to: request we are wishing to use
				551	* @obj: object which may be in use on another ring.
				552	*
				553	* This code is meant to abstract object synchronization with the GPU.
				554	* Conceptually we serialise writes between engines inside the GPU.
				555	* We only allow one engine to write into a buffer at any time, but
				556	* multiple readers. To ensure each has a coherent view of memory, we must:
				557	*
				558	* - If there is an outstanding write request to the object, the new
				559	* request must wait for it to complete (either CPU or in hw, requests
				560	* on the same ring will be naturally ordered).
				561	*
				562	* - If we are a write request (pending_write_domain is set), the new
				563	* request must wait for outstanding read requests to complete.
				564	*
				565	* Returns 0 if successful, else propagates up the lower layer error.
				566	*/
				567	int
				568	i915_gem_request_await_object(struct drm_i915_gem_request *to,
				569	struct drm_i915_gem_object *obj,
				570	bool write)
				571	{
				572	struct i915_gem_active *active;
				573	unsigned long active_mask;
				574	int idx;
				575
				576	if (write) {
				577	active_mask = i915_gem_object_get_active(obj);
				578	active = obj->last_read;
				579	} else {
				580	active_mask = 1;
				581	active = &obj->last_write;
				582	}
				583
				584	for_each_active(active_mask, idx) {
				585	struct drm_i915_gem_request *request;
				586	int ret;
				587
				588	request = i915_gem_active_peek(&active[idx],
				589	&obj->base.dev->struct_mutex);
				590	if (!request)
				591	continue;
				592
				593	ret = i915_gem_request_await_request(to, request);
				594	if (ret)
				595	return ret;
				596	}
				597
				598	return 0;
				599	}
				600
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	601	static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
				602	{
				603	struct drm_i915_private *dev_priv = engine->i915;
				604
				605	dev_priv->gt.active_engines \|= intel_engine_flag(engine);
				606	if (dev_priv->gt.awake)
				607	return;
				608
				609	intel_runtime_pm_get_noresume(dev_priv);
				610	dev_priv->gt.awake = true;
				611
Chris Wilson	54b4f68	2016-07-21 21:16:19 +0100	[diff] [blame]	612	intel_enable_gt_powersave(dev_priv);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	613	i915_update_gfx_val(dev_priv);
				614	if (INTEL_GEN(dev_priv) >= 6)
				615	gen6_rps_busy(dev_priv);
				616
				617	queue_delayed_work(dev_priv->wq,
				618	&dev_priv->gt.retire_work,
				619	round_jiffies_up_relative(HZ));
				620	}
				621
				622	/*
				623	* NB: This function is not allowed to fail. Doing so would mean the the
				624	* request is not being tracked for completion but the work itself is
				625	* going to happen on the hardware. This would be a Bad Thing(tm).
				626	*/
Chris Wilson	17f298cf	2016-08-10 13:41:46 +0100	[diff] [blame]	627	void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	628	{
Chris Wilson	95b2ab5	2016-08-15 10:48:46 +0100	[diff] [blame]	629	struct intel_engine_cs *engine = request->engine;
				630	struct intel_ring *ring = request->ring;
Chris Wilson	0a046a0	2016-09-09 14:12:00 +0100	[diff] [blame]	631	struct drm_i915_gem_request *prev;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	632	u32 request_start;
				633	u32 reserved_tail;
				634	int ret;
				635
Chris Wilson	0f25dff	2016-09-09 14:11:55 +0100	[diff] [blame]	636	trace_i915_gem_request_add(request);
				637
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	638	/*
				639	* To ensure that this call will not fail, space for its emissions
				640	* should already have been reserved in the ring buffer. Let the ring
				641	* know that it is time to use that space up.
				642	*/
Chris Wilson	ba76d91	2016-08-02 22:50:28 +0100	[diff] [blame]	643	request_start = ring->tail;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	644	reserved_tail = request->reserved_space;
				645	request->reserved_space = 0;
				646
				647	/*
				648	* Emit any outstanding flushes - execbuf can fail to emit the flush
				649	* after having emitted the batchbuffer command. Hence we need to fix
				650	* things up similar to emitting the lazy request. The difference here
				651	* is that the flush _must_ happen before the next request, no matter
				652	* what.
				653	*/
				654	if (flush_caches) {
Chris Wilson	7c9cf4e	2016-08-02 22:50:25 +0100	[diff] [blame]	655	ret = engine->emit_flush(request, EMIT_FLUSH);
Chris Wilson	c7fe7d2	2016-08-02 22:50:24 +0100	[diff] [blame]	656
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	657	/* Not allowed to fail! */
Chris Wilson	c7fe7d2	2016-08-02 22:50:24 +0100	[diff] [blame]	658	WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	659	}
				660
Chris Wilson	d045446	2016-08-15 10:48:40 +0100	[diff] [blame]	661	/* Record the position of the start of the breadcrumb so that
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	662	* should we detect the updated seqno part-way through the
				663	* GPU processing the request, we never over-estimate the
Chris Wilson	d045446	2016-08-15 10:48:40 +0100	[diff] [blame]	664	* position of the ring's HEAD.
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	665	*/
Chris Wilson	ba76d91	2016-08-02 22:50:28 +0100	[diff] [blame]	666	request->postfix = ring->tail;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	667
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	668	/* Not allowed to fail! */
Chris Wilson	ddd66c5	2016-08-02 22:50:31 +0100	[diff] [blame]	669	ret = engine->emit_request(request);
				670	WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
Chris Wilson	c5efa1a	2016-08-02 22:50:29 +0100	[diff] [blame]	671
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	672	/* Sanity check that the reserved size was large enough. */
Chris Wilson	ba76d91	2016-08-02 22:50:28 +0100	[diff] [blame]	673	ret = ring->tail - request_start;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	674	if (ret < 0)
Chris Wilson	1dae2df	2016-08-02 22:50:19 +0100	[diff] [blame]	675	ret += ring->size;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	676	WARN_ONCE(ret > reserved_tail,
				677	"Not enough space reserved (%d bytes) "
				678	"for adding the request (%d bytes)\n",
				679	reserved_tail, ret);
				680
Chris Wilson	0f25dff	2016-09-09 14:11:55 +0100	[diff] [blame]	681	/* Seal the request and mark it as pending execution. Note that
				682	* we may inspect this state, without holding any locks, during
				683	* hangcheck. Hence we apply the barrier to ensure that we do not
				684	* see a more recent value in the hws than we are tracking.
				685	*/
Chris Wilson	0a046a0	2016-09-09 14:12:00 +0100	[diff] [blame]	686
				687	prev = i915_gem_active_raw(&engine->last_request,
				688	&request->i915->drm.struct_mutex);
				689	if (prev)
				690	i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
				691	&request->submitq);
				692
Chris Wilson	0f25dff	2016-09-09 14:11:55 +0100	[diff] [blame]	693	request->emitted_jiffies = jiffies;
Chris Wilson	8687b3e	2016-10-07 07:53:24 +0100	[diff] [blame]	694	request->previous_seqno = engine->last_pending_seqno;
				695	engine->last_pending_seqno = request->fence.seqno;
Chris Wilson	0f25dff	2016-09-09 14:11:55 +0100	[diff] [blame]	696	i915_gem_active_set(&engine->last_request, request);
				697	list_add_tail(&request->link, &engine->request_list);
				698	list_add_tail(&request->ring_link, &ring->request_list);
				699
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	700	i915_gem_mark_busy(engine);
Chris Wilson	5590af3	2016-09-09 14:11:54 +0100	[diff] [blame]	701
				702	local_bh_disable();
				703	i915_sw_fence_commit(&request->submit);
				704	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	705	}
				706
Chris Wilson	221fe79	2016-09-09 14:11:51 +0100	[diff] [blame]	707	static void reset_wait_queue(wait_queue_head_t q, wait_queue_t wait)
				708	{
				709	unsigned long flags;
				710
				711	spin_lock_irqsave(&q->lock, flags);
				712	if (list_empty(&wait->task_list))
				713	__add_wait_queue(q, wait);
				714	spin_unlock_irqrestore(&q->lock, flags);
				715	}
				716
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	717	static unsigned long local_clock_us(unsigned int *cpu)
				718	{
				719	unsigned long t;
				720
				721	/* Cheaply and approximately convert from nanoseconds to microseconds.
				722	* The result and subsequent calculations are also defined in the same
				723	* approximate microseconds units. The principal source of timing
				724	* error here is from the simple truncation.
				725	*
				726	* Note that local_clock() is only defined wrt to the current CPU;
				727	* the comparisons are no longer valid if we switch CPUs. Instead of
				728	* blocking preemption for the entire busywait, we can detect the CPU
				729	* switch and use that as indicator of system load and a reason to
				730	* stop busywaiting, see busywait_stop().
				731	*/
				732	*cpu = get_cpu();
				733	t = local_clock() >> 10;
				734	put_cpu();
				735
				736	return t;
				737	}
				738
				739	static bool busywait_stop(unsigned long timeout, unsigned int cpu)
				740	{
				741	unsigned int this_cpu;
				742
				743	if (time_after(local_clock_us(&this_cpu), timeout))
				744	return true;
				745
				746	return this_cpu != cpu;
				747	}
				748
				749	bool __i915_spin_request(const struct drm_i915_gem_request *req,
				750	int state, unsigned long timeout_us)
				751	{
				752	unsigned int cpu;
				753
				754	/* When waiting for high frequency requests, e.g. during synchronous
				755	* rendering split between the CPU and GPU, the finite amount of time
				756	* required to set up the irq and wait upon it limits the response
				757	* rate. By busywaiting on the request completion for a short while we
				758	* can service the high frequency waits as quick as possible. However,
				759	* if it is a slow request, we want to sleep as quickly as possible.
				760	* The tradeoff between waiting and sleeping is roughly the time it
				761	* takes to sleep on a request, on the order of a microsecond.
				762	*/
				763
				764	timeout_us += local_clock_us(&cpu);
				765	do {
				766	if (i915_gem_request_completed(req))
				767	return true;
				768
				769	if (signal_pending_state(state, current))
				770	break;
				771
				772	if (busywait_stop(timeout_us, cpu))
				773	break;
				774
				775	cpu_relax_lowlatency();
				776	} while (!need_resched());
				777
				778	return false;
				779	}
				780
				781	/**
Chris Wilson	776f323	2016-08-04 07:52:40 +0100	[diff] [blame]	782	* i915_wait_request - wait until execution of request has finished
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	783	* @req: duh!
Chris Wilson	ea746f3	2016-09-09 14:11:49 +0100	[diff] [blame]	784	* @flags: how to wait
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	785	* @timeout: in - how long to wait (NULL forever); out - how much time remaining
				786	* @rps: client to charge for RPS boosting
				787	*
				788	* Note: It is of utmost importance that the passed in seqno and reset_counter
				789	* values have been read by the caller in an smp safe manner. Where read-side
				790	* locks are involved, it is sufficient to read the reset_counter before
				791	* unlocking the lock that protects the seqno. For lockless tricks, the
				792	* reset_counter _must_ be read before, and an appropriate smp_rmb must be
				793	* inserted.
				794	*
				795	* Returns 0 if the request was found within the alloted time. Else returns the
				796	* errno with remaining time filled in timeout argument.
				797	*/
Chris Wilson	776f323	2016-08-04 07:52:40 +0100	[diff] [blame]	798	int i915_wait_request(struct drm_i915_gem_request *req,
Chris Wilson	ea746f3	2016-09-09 14:11:49 +0100	[diff] [blame]	799	unsigned int flags,
Chris Wilson	776f323	2016-08-04 07:52:40 +0100	[diff] [blame]	800	s64 *timeout,
				801	struct intel_rps_client *rps)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	802	{
Chris Wilson	ea746f3	2016-09-09 14:11:49 +0100	[diff] [blame]	803	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
				804	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	805	DEFINE_WAIT(reset);
				806	struct intel_wait wait;
				807	unsigned long timeout_remain;
				808	int ret = 0;
				809
				810	might_sleep();
Chris Wilson	22dd3bb	2016-09-09 14:11:50 +0100	[diff] [blame]	811	#if IS_ENABLED(CONFIG_LOCKDEP)
				812	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
				813	!!(flags & I915_WAIT_LOCKED));
				814	#endif
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	815
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	816	if (i915_gem_request_completed(req))
				817	return 0;
				818
				819	timeout_remain = MAX_SCHEDULE_TIMEOUT;
				820	if (timeout) {
				821	if (WARN_ON(*timeout < 0))
				822	return -EINVAL;
				823
				824	if (*timeout == 0)
				825	return -ETIME;
				826
				827	/* Record current time in case interrupted, or wedged */
				828	timeout_remain = nsecs_to_jiffies_timeout(*timeout);
				829	*timeout += ktime_get_raw_ns();
				830	}
				831
				832	trace_i915_gem_request_wait_begin(req);
				833
				834	/* This client is about to stall waiting for the GPU. In many cases
				835	* this is undesirable and limits the throughput of the system, as
				836	* many clients cannot continue processing user input/output whilst
				837	* blocked. RPS autotuning may take tens of milliseconds to respond
				838	* to the GPU load and thus incurs additional latency for the client.
				839	* We can circumvent that by promoting the GPU frequency to maximum
				840	* before we wait. This makes the GPU throttle up much more quickly
				841	* (good for benchmarks and user experience, e.g. window animations),
				842	* but at a cost of spending more power processing the workload
				843	* (bad for battery). Not all clients even want their results
				844	* immediately and for them we should just let the GPU select its own
				845	* frequency to maximise efficiency. To prevent a single client from
				846	* forcing the clocks too high for the whole system, we only allow
				847	* each client to waitboost once in a busy period.
				848	*/
Chris Wilson	42df271	2016-07-20 09:21:12 +0100	[diff] [blame]	849	if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	850	gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
				851
Daniel Vetter	437c308	2016-08-05 18:11:24 +0200	[diff] [blame]	852	/* Optimistic short spin before touching IRQs */
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	853	if (i915_spin_request(req, state, 5))
				854	goto complete;
				855
				856	set_current_state(state);
Chris Wilson	22dd3bb	2016-09-09 14:11:50 +0100	[diff] [blame]	857	if (flags & I915_WAIT_LOCKED)
				858	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	859
Chris Wilson	0476965	2016-07-20 09:21:11 +0100	[diff] [blame]	860	intel_wait_init(&wait, req->fence.seqno);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	861	if (intel_engine_add_wait(req->engine, &wait))
				862	/* In order to check that we haven't missed the interrupt
				863	* as we enabled it, we need to kick ourselves to do a
				864	* coherent check on the seqno before we sleep.
				865	*/
				866	goto wakeup;
				867
				868	for (;;) {
				869	if (signal_pending_state(state, current)) {
				870	ret = -ERESTARTSYS;
				871	break;
				872	}
				873
				874	timeout_remain = io_schedule_timeout(timeout_remain);
				875	if (timeout_remain == 0) {
				876	ret = -ETIME;
				877	break;
				878	}
				879
				880	if (intel_wait_complete(&wait))
				881	break;
				882
				883	set_current_state(state);
				884
				885	wakeup:
				886	/* Carefully check if the request is complete, giving time
				887	* for the seqno to be visible following the interrupt.
				888	* We also have to check in case we are kicked by the GPU
				889	* reset in order to drop the struct_mutex.
				890	*/
				891	if (__i915_request_irq_complete(req))
				892	break;
				893
Chris Wilson	221fe79	2016-09-09 14:11:51 +0100	[diff] [blame]	894	/* If the GPU is hung, and we hold the lock, reset the GPU
				895	* and then check for completion. On a full reset, the engine's
				896	* HW seqno will be advanced passed us and we are complete.
				897	* If we do a partial reset, we have to wait for the GPU to
				898	* resume and update the breadcrumb.
				899	*
				900	* If we don't hold the mutex, we can just wait for the worker
				901	* to come along and update the breadcrumb (either directly
				902	* itself, or indirectly by recovering the GPU).
				903	*/
				904	if (flags & I915_WAIT_LOCKED &&
				905	i915_reset_in_progress(&req->i915->gpu_error)) {
				906	__set_current_state(TASK_RUNNING);
				907	i915_reset(req->i915);
				908	reset_wait_queue(&req->i915->gpu_error.wait_queue,
				909	&reset);
				910	continue;
				911	}
				912
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	913	/* Only spin if we know the GPU is processing this request */
				914	if (i915_spin_request(req, state, 2))
				915	break;
				916	}
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	917
				918	intel_engine_remove_wait(req->engine, &wait);
Chris Wilson	22dd3bb	2016-09-09 14:11:50 +0100	[diff] [blame]	919	if (flags & I915_WAIT_LOCKED)
				920	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	921	__set_current_state(TASK_RUNNING);
Chris Wilson	22dd3bb	2016-09-09 14:11:50 +0100	[diff] [blame]	922
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	923	complete:
				924	trace_i915_gem_request_wait_end(req);
				925
				926	if (timeout) {
				927	*timeout -= ktime_get_raw_ns();
				928	if (*timeout < 0)
				929	*timeout = 0;
				930
				931	/*
				932	* Apparently ktime isn't accurate enough and occasionally has a
				933	* bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
				934	* things up to make the test happy. We allow up to 1 jiffy.
				935	*
				936	* This is a regrssion from the timespec->ktime conversion.
				937	*/
				938	if (ret == -ETIME && timeout < jiffies_to_usecs(1)1000)
				939	*timeout = 0;
				940	}
				941
Chris Wilson	42df271	2016-07-20 09:21:12 +0100	[diff] [blame]	942	if (IS_RPS_USER(rps) &&
				943	req->fence.seqno == req->engine->last_submitted_seqno) {
Chris Wilson	05235c5	2016-07-20 09:21:08 +0100	[diff] [blame]	944	/* The GPU is now idle and this client has stalled.
				945	* Since no other client has submitted a request in the
				946	* meantime, assume that this client is the only one
				947	* supplying work to the GPU but is unable to keep that
				948	* work supplied because it is waiting. Since the GPU is
				949	* then never kept fully busy, RPS autoclocking will
				950	* keep the clocks relatively low, causing further delays.
				951	* Compensate by giving the synchronous client credit for
				952	* a waitboost next time.
				953	*/
				954	spin_lock(&req->i915->rps.client_lock);
				955	list_del_init(&rps->link);
				956	spin_unlock(&req->i915->rps.client_lock);
				957	}
				958
				959	return ret;
				960	}
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	961
Chris Wilson	f640719	2016-08-27 08:54:00 +0100	[diff] [blame]	962	static bool engine_retire_requests(struct intel_engine_cs *engine)
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	963	{
				964	struct drm_i915_gem_request request, next;
				965
				966	list_for_each_entry_safe(request, next, &engine->request_list, link) {
				967	if (!i915_gem_request_completed(request))
Chris Wilson	f640719	2016-08-27 08:54:00 +0100	[diff] [blame]	968	return false;
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	969
				970	i915_gem_request_retire(request);
				971	}
Chris Wilson	f640719	2016-08-27 08:54:00 +0100	[diff] [blame]	972
				973	return true;
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	974	}
				975
				976	void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
				977	{
				978	struct intel_engine_cs *engine;
Chris Wilson	bafb0fc	2016-08-27 08:54:01 +0100	[diff] [blame]	979	unsigned int tmp;
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	980
				981	lockdep_assert_held(&dev_priv->drm.struct_mutex);
				982
				983	if (dev_priv->gt.active_engines == 0)
				984	return;
				985
				986	GEM_BUG_ON(!dev_priv->gt.awake);
				987
Chris Wilson	bafb0fc	2016-08-27 08:54:01 +0100	[diff] [blame]	988	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
Chris Wilson	f640719	2016-08-27 08:54:00 +0100	[diff] [blame]	989	if (engine_retire_requests(engine))
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	990	dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
Chris Wilson	4b8de8e	2016-08-04 07:52:42 +0100	[diff] [blame]	991
				992	if (dev_priv->gt.active_engines == 0)
				993	queue_delayed_work(dev_priv->wq,
				994	&dev_priv->gt.idle_work,
				995	msecs_to_jiffies(100));
				996	}