drm/i915: Only hold a process-local lock whilst throttling.

Avoid cause latencies in other clients by not taking the global struct
mutex and moving the per-client request manipulation a local per-client
mutex. For example, this allows a compositor to schedule a page-flip
(through X) whilst an OpenGL application is monopolising the GPU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 36f0e36..eb5dd52 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -261,7 +261,7 @@
 
 	if (dev_priv->render_ring.status_page.page_addr != NULL) {
 		seq_printf(m, "Current sequence: %d\n",
-			   i915_get_gem_seqno(dev,  &dev_priv->render_ring));
+			   dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring));
 	} else {
 		seq_printf(m, "Current sequence: hws uninitialized\n");
 	}
@@ -321,7 +321,7 @@
 		   atomic_read(&dev_priv->irq_received));
 	if (dev_priv->render_ring.status_page.page_addr != NULL) {
 		seq_printf(m, "Current sequence:    %d\n",
-			   i915_get_gem_seqno(dev,  &dev_priv->render_ring));
+			   dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring));
 	} else {
 		seq_printf(m, "Current sequence:    hws uninitialized\n");
 	}
@@ -932,7 +932,7 @@
 
 	atomic_set(&dev_priv->mm.wedged, val);
 	if (val) {
-		DRM_WAKEUP(&dev_priv->irq_queue);
+		wake_up_all(&dev_priv->irq_queue);
 		queue_work(dev_priv->wq, &dev_priv->error_work);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 048c54b..a3aea17 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2162,20 +2162,19 @@
 	return 0;
 }
 
-int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
+int i915_driver_open(struct drm_device *dev, struct drm_file *file)
 {
-	struct drm_i915_file_private *i915_file_priv;
+	struct drm_i915_file_private *file_priv;
 
 	DRM_DEBUG_DRIVER("\n");
-	i915_file_priv = (struct drm_i915_file_private *)
-	    kmalloc(sizeof(*i915_file_priv), GFP_KERNEL);
-
-	if (!i915_file_priv)
+	file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL);
+	if (!file_priv)
 		return -ENOMEM;
 
-	file_priv->driver_priv = i915_file_priv;
+	file->driver_priv = file_priv;
 
-	INIT_LIST_HEAD(&i915_file_priv->mm.request_list);
+	INIT_LIST_HEAD(&file_priv->mm.request_list);
+	mutex_init(&file_priv->mutex);
 
 	return 0;
 }
@@ -2218,11 +2217,12 @@
 		i915_mem_release(dev, file_priv, dev_priv->agp_heap);
 }
 
-void i915_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
+void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
 {
-	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	kfree(i915_file_priv);
+	mutex_destroy(&file_priv->mutex);
+	kfree(file_priv);
 }
 
 struct drm_ioctl_desc i915_ioctls[] = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cbfb99d..2611e85b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -844,11 +844,13 @@
 	/** global list entry for this request */
 	struct list_head list;
 
+	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
 };
 
 struct drm_i915_file_private {
+	struct mutex mutex;
 	struct {
 		struct list_head request_list;
 	} mm;
@@ -1005,9 +1007,16 @@
 int i915_gem_object_unbind(struct drm_gem_object *obj);
 void i915_gem_release_mmap(struct drm_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
-uint32_t i915_get_gem_seqno(struct drm_device *dev,
-		struct intel_ring_buffer *ring);
-bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool
+i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
+
 int i915_gem_object_get_fence_reg(struct drm_gem_object *obj,
 				  bool interruptible);
 int i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dec7bbc..9185f09 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1592,17 +1592,17 @@
 
 uint32_t
 i915_add_request(struct drm_device *dev,
-		 struct drm_file *file_priv,
+		 struct drm_file *file,
 		 struct drm_i915_gem_request *request,
 		 struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_file_private *i915_file_priv = NULL;
+	struct drm_i915_file_private *file_priv = NULL;
 	uint32_t seqno;
 	int was_empty;
 
-	if (file_priv != NULL)
-		i915_file_priv = file_priv->driver_priv;
+	if (file != NULL)
+		file_priv = file->driver_priv;
 
 	if (request == NULL) {
 		request = kzalloc(sizeof(*request), GFP_KERNEL);
@@ -1610,7 +1610,7 @@
 			return 0;
 	}
 
-	seqno = ring->add_request(dev, ring, file_priv, 0);
+	seqno = ring->add_request(dev, ring, 0);
 
 	request->seqno = seqno;
 	request->ring = ring;
@@ -1618,11 +1618,12 @@
 	was_empty = list_empty(&ring->request_list);
 	list_add_tail(&request->list, &ring->request_list);
 
-	if (i915_file_priv) {
+	if (file_priv) {
+		mutex_lock(&file_priv->mutex);
+		request->file_priv = file_priv;
 		list_add_tail(&request->client_list,
-			      &i915_file_priv->mm.request_list);
-	} else {
-		INIT_LIST_HEAD(&request->client_list);
+			      &file_priv->mm.request_list);
+		mutex_unlock(&file_priv->mutex);
 	}
 
 	if (!dev_priv->mm.suspended) {
@@ -1654,20 +1655,14 @@
 			I915_GEM_DOMAIN_COMMAND, flush_domains);
 }
 
-/**
- * Returns true if seq1 is later than seq2.
- */
-bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
 {
-	return (int32_t)(seq1 - seq2) >= 0;
-}
-
-uint32_t
-i915_get_gem_seqno(struct drm_device *dev,
-		   struct intel_ring_buffer *ring)
-{
-	return ring->get_gem_seqno(dev, ring);
+	if (request->file_priv) {
+		mutex_lock(&request->file_priv->mutex);
+		list_del(&request->client_list);
+		mutex_unlock(&request->file_priv->mutex);
+	}
 }
 
 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
@@ -1681,7 +1676,7 @@
 					   list);
 
 		list_del(&request->list);
-		list_del(&request->client_list);
+		i915_gem_request_remove_from_client(request);
 		kfree(request);
 	}
 
@@ -1746,7 +1741,7 @@
 	    list_empty(&ring->request_list))
 		return;
 
-	seqno = i915_get_gem_seqno(dev, ring);
+	seqno = ring->get_seqno(dev, ring);
 	while (!list_empty(&ring->request_list)) {
 		struct drm_i915_gem_request *request;
 
@@ -1760,7 +1755,7 @@
 		trace_i915_gem_request_retire(dev, request->seqno);
 
 		list_del(&request->list);
-		list_del(&request->client_list);
+		i915_gem_request_remove_from_client(request);
 		kfree(request);
 	}
 
@@ -1862,7 +1857,7 @@
 	if (atomic_read(&dev_priv->mm.wedged))
 		return -EIO;
 
-	if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
+	if (!i915_seqno_passed(ring->get_seqno(dev, ring), seqno)) {
 		if (HAS_PCH_SPLIT(dev))
 			ier = I915_READ(DEIER) | I915_READ(GTIER);
 		else
@@ -1881,12 +1876,12 @@
 		if (interruptible)
 			ret = wait_event_interruptible(ring->irq_queue,
 				i915_seqno_passed(
-					ring->get_gem_seqno(dev, ring), seqno)
+					ring->get_seqno(dev, ring), seqno)
 				|| atomic_read(&dev_priv->mm.wedged));
 		else
 			wait_event(ring->irq_queue,
 				i915_seqno_passed(
-					ring->get_gem_seqno(dev, ring), seqno)
+					ring->get_seqno(dev, ring), seqno)
 				|| atomic_read(&dev_priv->mm.wedged));
 
 		ring->user_irq_put(dev, ring);
@@ -1899,7 +1894,7 @@
 
 	if (ret && ret != -ERESTARTSYS)
 		DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
-			  __func__, ret, seqno, ring->get_gem_seqno(dev, ring),
+			  __func__, ret, seqno, ring->get_seqno(dev, ring),
 			  dev_priv->next_seqno);
 
 	/* Directly dispatch request retiring.  While we have the work queue
@@ -3384,28 +3379,48 @@
  * relatively low latency when blocking on a particular request to finish.
  */
 static int
-i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
+i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 {
-	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
-	int ret = 0;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
+	struct drm_i915_gem_request *request;
+	struct intel_ring_buffer *ring = NULL;
+	u32 seqno = 0;
+	int ret;
 
-	mutex_lock(&dev->struct_mutex);
-	while (!list_empty(&i915_file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
-
-		request = list_first_entry(&i915_file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-
+	mutex_lock(&file_priv->mutex);
+	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
 		if (time_after_eq(request->emitted_jiffies, recent_enough))
 			break;
 
-		ret = i915_wait_request(dev, request->seqno, request->ring);
-		if (ret != 0)
-			break;
+		ring = request->ring;
+		seqno = request->seqno;
 	}
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&file_priv->mutex);
+
+	if (seqno == 0)
+		return 0;
+
+	ret = 0;
+	if (!i915_seqno_passed(ring->get_seqno(dev, ring), seqno)) {
+		/* And wait for the seqno passing without holding any locks and
+		 * causing extra latency for others. This is safe as the irq
+		 * generation is designed to be run atomically and so is
+		 * lockless.
+		 */
+		ring->user_irq_get(dev, ring);
+		ret = wait_event_interruptible(ring->irq_queue,
+					       i915_seqno_passed(ring->get_seqno(dev, ring), seqno)
+					       || atomic_read(&dev_priv->mm.wedged));
+		ring->user_irq_put(dev, ring);
+
+		if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
+			ret = -EIO;
+	}
+
+	if (ret == 0)
+		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
 	return ret;
 }
@@ -4857,17 +4872,26 @@
 	return 0;
 }
 
-void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 {
-	struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
 
 	/* Clean up our request list when the client is going away, so that
 	 * later retire_requests won't dereference our soon-to-be-gone
 	 * file_priv.
 	 */
 	mutex_lock(&dev->struct_mutex);
-	while (!list_empty(&i915_file_priv->mm.request_list))
-		list_del_init(i915_file_priv->mm.request_list.next);
+	mutex_lock(&file_priv->mutex);
+	while (!list_empty(&file_priv->mm.request_list)) {
+		struct drm_i915_gem_request *request;
+
+		request = list_first_entry(&file_priv->mm.request_list,
+					   struct drm_i915_gem_request,
+					   client_list);
+		list_del(&request->client_list);
+		request->file_priv = NULL;
+	}
+	mutex_unlock(&file_priv->mutex);
 	mutex_unlock(&dev->struct_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d4c053e..245a07e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -327,16 +327,16 @@
 	}
 
 	if (gt_iir & GT_PIPE_NOTIFY) {
-		u32 seqno = render_ring->get_gem_seqno(dev, render_ring);
+		u32 seqno = render_ring->get_seqno(dev, render_ring);
 		render_ring->irq_gem_seqno = seqno;
 		trace_i915_gem_request_complete(dev, seqno);
-		DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
+		wake_up_all(&dev_priv->render_ring.irq_queue);
 		dev_priv->hangcheck_count = 0;
 		mod_timer(&dev_priv->hangcheck_timer,
 			  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
 	}
 	if (gt_iir & bsd_usr_interrupt)
-		DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue);
+		wake_up_all(&dev_priv->bsd_ring.irq_queue);
 
 	if (de_iir & DE_GSE)
 		intel_opregion_gse_intr(dev);
@@ -573,7 +573,8 @@
 		return;
 	}
 
-	error->seqno = i915_get_gem_seqno(dev, &dev_priv->render_ring);
+	error->seqno =
+	       	dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring);
 	error->eir = I915_READ(EIR);
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 	error->pipeastat = I915_READ(PIPEASTAT);
@@ -873,7 +874,9 @@
 		/*
 		 * Wakeup waiting processes so they don't hang
 		 */
-		DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
+		wake_up_all(&dev_priv->render_ring.irq_queue);
+		if (HAS_BSD(dev))
+			wake_up_all(&dev_priv->bsd_ring.irq_queue);
 	}
 
 	queue_work(dev_priv->wq, &dev_priv->error_work);
@@ -1012,18 +1015,17 @@
 		}
 
 		if (iir & I915_USER_INTERRUPT) {
-			u32 seqno =
-				render_ring->get_gem_seqno(dev, render_ring);
+			u32 seqno = render_ring->get_seqno(dev, render_ring);
 			render_ring->irq_gem_seqno = seqno;
 			trace_i915_gem_request_complete(dev, seqno);
-			DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
+			wake_up_all(&dev_priv->render_ring.irq_queue);
 			dev_priv->hangcheck_count = 0;
 			mod_timer(&dev_priv->hangcheck_timer,
 				  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
 		}
 
 		if (HAS_BSD(dev) && (iir & I915_BSD_USER_INTERRUPT))
-			DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue);
+			wake_up_all(&dev_priv->bsd_ring.irq_queue);
 
 		if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT) {
 			intel_prepare_page_flip(dev, 0);
@@ -1333,9 +1335,8 @@
 
 	/* If all work is done then ACTHD clearly hasn't advanced. */
 	if (list_empty(&dev_priv->render_ring.request_list) ||
-		i915_seqno_passed(i915_get_gem_seqno(dev,
-				&dev_priv->render_ring),
-			i915_get_tail_request(dev)->seqno)) {
+		i915_seqno_passed(dev_priv->render_ring.get_seqno(dev, &dev_priv->render_ring),
+				  i915_get_tail_request(dev)->seqno)) {
 		bool missed_wakeup = false;
 
 		dev_priv->hangcheck_count = 0;
@@ -1343,13 +1344,13 @@
 		/* Issue a wake-up to catch stuck h/w. */
 		if (dev_priv->render_ring.waiting_gem_seqno &&
 		    waitqueue_active(&dev_priv->render_ring.irq_queue)) {
-			DRM_WAKEUP(&dev_priv->render_ring.irq_queue);
+			wake_up_all(&dev_priv->render_ring.irq_queue);
 			missed_wakeup = true;
 		}
 
 		if (dev_priv->bsd_ring.waiting_gem_seqno &&
 		    waitqueue_active(&dev_priv->bsd_ring.irq_queue)) {
-			DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue);
+			wake_up_all(&dev_priv->bsd_ring.irq_queue);
 			missed_wakeup = true;
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4843d02..00214c1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4983,7 +4983,7 @@
 	/* Initial scanout buffer will have a 0 pending flip count */
 	if ((atomic_read(&obj_priv->pending_flip) == 0) ||
 	    atomic_dec_and_test(&obj_priv->pending_flip))
-		DRM_WAKEUP(&dev_priv->pending_flip_queue);
+		wake_up(&dev_priv->pending_flip_queue);
 	schedule_work(&work->work);
 
 	trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1bcea7c..9b848be 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -239,7 +239,6 @@
 static u32
 render_ring_add_request(struct drm_device *dev,
 			struct intel_ring_buffer *ring,
-			struct drm_file *file_priv,
 			u32 flush_domains)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -303,8 +302,8 @@
 }
 
 static u32
-render_ring_get_gem_seqno(struct drm_device *dev,
-			  struct intel_ring_buffer *ring)
+render_ring_get_seqno(struct drm_device *dev,
+		      struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	if (HAS_PIPE_CONTROL(dev))
@@ -390,7 +389,6 @@
 static u32
 bsd_ring_add_request(struct drm_device *dev,
 		     struct intel_ring_buffer *ring,
-		     struct drm_file *file_priv,
 		     u32 flush_domains)
 {
 	u32 seqno;
@@ -432,8 +430,8 @@
 }
 
 static u32
-bsd_ring_get_gem_seqno(struct drm_device *dev,
-		       struct intel_ring_buffer *ring)
+bsd_ring_get_seqno(struct drm_device *dev,
+		   struct intel_ring_buffer *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
@@ -773,7 +771,7 @@
 	.get_active_head	= render_ring_get_active_head,
 	.flush			= render_ring_flush,
 	.add_request		= render_ring_add_request,
-	.get_gem_seqno		= render_ring_get_gem_seqno,
+	.get_seqno		= render_ring_get_seqno,
 	.user_irq_get		= render_ring_get_user_irq,
 	.user_irq_put		= render_ring_put_user_irq,
 	.dispatch_gem_execbuffer = render_ring_dispatch_gem_execbuffer,
@@ -792,7 +790,7 @@
 	.get_active_head	= bsd_ring_get_active_head,
 	.flush			= bsd_ring_flush,
 	.add_request		= bsd_ring_add_request,
-	.get_gem_seqno		= bsd_ring_get_gem_seqno,
+	.get_seqno		= bsd_ring_get_seqno,
 	.user_irq_get		= bsd_ring_get_user_irq,
 	.user_irq_put		= bsd_ring_put_user_irq,
 	.dispatch_gem_execbuffer = bsd_ring_dispatch_gem_execbuffer,
@@ -883,7 +881,7 @@
        .get_active_head		= gen6_bsd_ring_get_active_head,
        .flush			= gen6_bsd_ring_flush,
        .add_request		= bsd_ring_add_request,
-       .get_gem_seqno		= bsd_ring_get_gem_seqno,
+       .get_seqno		= bsd_ring_get_seqno,
        .user_irq_get		= bsd_ring_get_user_irq,
        .user_irq_put		= bsd_ring_put_user_irq,
        .dispatch_gem_execbuffer	= gen6_bsd_ring_dispatch_gem_execbuffer,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ce52101..d506da1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -58,10 +58,9 @@
 			u32	flush_domains);
 	u32		(*add_request)(struct drm_device *dev,
 			struct intel_ring_buffer *ring,
-			struct drm_file *file_priv,
 			u32 flush_domains);
-	u32		(*get_gem_seqno)(struct drm_device *dev,
-			struct intel_ring_buffer *ring);
+	u32		(*get_seqno)(struct drm_device *dev,
+				     struct intel_ring_buffer *ring);
 	int		(*dispatch_gem_execbuffer)(struct drm_device *dev,
 			struct intel_ring_buffer *ring,
 			struct drm_i915_gem_execbuffer2 *exec,