drm/i915: Only emit a flush request on the active ring.

When flushing the GPU domains,we emit a flush on *both* rings, even
though they share a unified cache. Only emit the flush on the currently
active ring.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4b6aeb5..ed09846 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -623,6 +623,8 @@
 
 		/* storage for physical objects */
 		struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
+
+		uint32_t flush_rings;
 	} mm;
 	struct sdvo_device_mapping sdvo_mappings[2];
 	/* indicate whether the LVDS_BORDER should be enabled or not */
@@ -1014,9 +1016,6 @@
 			 bool interruptible,
 			 struct intel_ring_buffer *ring);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
-void i915_gem_process_flushing_list(struct drm_device *dev,
-				    uint32_t flush_domains,
-				    struct intel_ring_buffer *ring);
 int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
 				      int write);
 int i915_gem_object_set_to_display_plane(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1c02798..cf27655 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1567,7 +1567,7 @@
 	i915_verify_inactive(dev, __FILE__, __LINE__);
 }
 
-void
+static void
 i915_gem_process_flushing_list(struct drm_device *dev,
 			       uint32_t flush_domains,
 			       struct intel_ring_buffer *ring)
@@ -1880,23 +1880,36 @@
 }
 
 static void
+i915_gem_flush_ring(struct drm_device *dev,
+		    struct intel_ring_buffer *ring,
+		    uint32_t invalidate_domains,
+		    uint32_t flush_domains)
+{
+	ring->flush(dev, ring, invalidate_domains, flush_domains);
+	i915_gem_process_flushing_list(dev, flush_domains, ring);
+}
+
+static void
 i915_gem_flush(struct drm_device *dev,
 	       uint32_t invalidate_domains,
-	       uint32_t flush_domains)
+	       uint32_t flush_domains,
+	       uint32_t flush_rings)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
 		drm_agp_chipset_flush(dev);
 
-	dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
-			invalidate_domains,
-			flush_domains);
-
-	if (HAS_BSD(dev))
-		dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
-				invalidate_domains,
-				flush_domains);
+	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
+		if (flush_rings & RING_RENDER)
+			i915_gem_flush_ring(dev,
+					    &dev_priv->render_ring,
+					    invalidate_domains, flush_domains);
+		if (flush_rings & RING_BSD)
+			i915_gem_flush_ring(dev,
+					    &dev_priv->bsd_ring,
+					    invalidate_domains, flush_domains);
+	}
 }
 
 /**
@@ -2022,7 +2035,9 @@
 		return 0;
 
 	/* Flush everything onto the inactive list. */
-	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	i915_gem_flush_ring(dev,
+			    &dev_priv->render_ring,
+			    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
 
 	ret = i915_wait_request(dev,
 				i915_gem_next_request_seqno(dev, &dev_priv->render_ring),
@@ -2031,6 +2046,10 @@
 		return ret;
 
 	if (HAS_BSD(dev)) {
+		i915_gem_flush_ring(dev,
+				    &dev_priv->bsd_ring,
+				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+
 		ret = i915_wait_request(dev,
 					i915_gem_next_request_seqno(dev, &dev_priv->bsd_ring),
 					&dev_priv->bsd_ring);
@@ -2598,7 +2617,9 @@
 
 	/* Queue the GPU write cache flushing we need. */
 	old_write_domain = obj->write_domain;
-	i915_gem_flush(dev, 0, obj->write_domain);
+	i915_gem_flush_ring(dev,
+			    to_intel_bo(obj)->ring,
+			    0, obj->write_domain);
 	BUG_ON(obj->write_domain);
 
 	trace_i915_gem_object_change_domain(obj,
@@ -2908,6 +2929,7 @@
 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
 {
 	struct drm_device		*dev = obj->dev;
+	struct drm_i915_private		*dev_priv = dev->dev_private;
 	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
 	uint32_t			invalidate_domains = 0;
 	uint32_t			flush_domains = 0;
@@ -2972,6 +2994,8 @@
 
 	dev->invalidate_domains |= invalidate_domains;
 	dev->flush_domains |= flush_domains;
+	if (obj_priv->ring)
+		dev_priv->mm.flush_rings |= obj_priv->ring->id;
 #if WATCH_BUF
 	DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
 		 __func__,
@@ -3684,6 +3708,7 @@
 	 */
 	dev->invalidate_domains = 0;
 	dev->flush_domains = 0;
+	dev_priv->mm.flush_rings = 0;
 
 	for (i = 0; i < args->buffer_count; i++) {
 		struct drm_gem_object *obj = object_list[i];
@@ -3703,7 +3728,8 @@
 #endif
 		i915_gem_flush(dev,
 			       dev->invalidate_domains,
-			       dev->flush_domains);
+			       dev->flush_domains,
+			       dev_priv->mm.flush_rings);
 	}
 
 	if (dev_priv->render_ring.outstanding_lazy_request) {
@@ -4170,8 +4196,10 @@
 		 * use this buffer rather sooner than later, so issuing the required
 		 * flush earlier is beneficial.
 		 */
-		if (obj->write_domain) {
-			i915_gem_flush(dev, 0, obj->write_domain);
+		if (obj->write_domain & I915_GEM_GPU_DOMAINS) {
+			i915_gem_flush_ring(dev,
+					    obj_priv->ring,
+					    0, obj->write_domain);
 			(void)i915_add_request(dev, file_priv, NULL, obj_priv->ring);
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 670f94a..45f66e2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -116,8 +116,6 @@
 		intel_ring_emit(dev, ring, MI_NOOP);
 		intel_ring_advance(dev, ring);
 	}
-
-	i915_gem_process_flushing_list(dev, flush_domains, ring);
 }
 
 static unsigned int render_ring_get_head(struct drm_device *dev,
@@ -386,8 +384,6 @@
 	intel_ring_emit(dev, ring, MI_FLUSH);
 	intel_ring_emit(dev, ring, MI_NOOP);
 	intel_ring_advance(dev, ring);
-
-	i915_gem_process_flushing_list(dev, flush_domains, ring);
 }
 
 static inline unsigned int bsd_ring_get_head(struct drm_device *dev,
@@ -799,6 +795,7 @@
 
 struct intel_ring_buffer render_ring = {
 	.name			= "render ring",
+	.id			= RING_RENDER,
 	.regs                   = {
 		.ctl = PRB0_CTL,
 		.head = PRB0_HEAD,
@@ -836,6 +833,7 @@
 
 struct intel_ring_buffer bsd_ring = {
 	.name                   = "bsd ring",
+	.id			= RING_BSD,
 	.regs			= {
 		.ctl = BSD_RING_CTL,
 		.head = BSD_RING_HEAD,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fa5d84f..8dc0e62 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -10,6 +10,10 @@
 struct drm_i915_gem_execbuffer2;
 struct  intel_ring_buffer {
 	const char	*name;
+	enum intel_ring_id {
+		RING_RENDER = 0x1,
+		RING_BSD = 0x2,
+	} id;
 	struct		ring_regs {
 			u32 ctl;
 			u32 head;