drm/i915: implement fastpath for overlay flip waiting

As long as the gpu can keep up, neither the cpu (waiting for gpu)
nore the gpu (waiting for vblank to do an overlay flip) stalls.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Eric Anholt <eric@anholt.net>
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ce03fd5..fd6362e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -808,6 +808,9 @@
 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
 		     unsigned long end);
 int i915_gem_idle(struct drm_device *dev);
+uint32_t i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+			  uint32_t flush_domains);
+int i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible);
 int i915_lp_ring_sync(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7d1e9ad..5e579a4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1583,7 +1583,7 @@
  *
  * Returned sequence numbers are nonzero on success.
  */
-static uint32_t
+uint32_t
 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 		 uint32_t flush_domains)
 {
@@ -1820,7 +1820,7 @@
 	mutex_unlock(&dev->struct_mutex);
 }
 
-static int
+int
 i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c9b1b97..5b503cb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -126,7 +126,9 @@
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	void *virt_addr;
+	/* flip handling */
 	int hw_wedged;
+	uint32_t last_flip_req;
 };
 
 struct intel_crtc {
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 4e88abb..85e07e4 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -251,7 +251,6 @@
         drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 flip_addr = overlay->flip_addr;
 	u32 tmp;
-	int ret;
 	RING_LOCALS;
 
 	BUG_ON(!overlay->active);
@@ -264,11 +263,40 @@
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	BEGIN_LP_RING(6);
+	BEGIN_LP_RING(4);
 	OUT_RING(MI_FLUSH);
 	OUT_RING(MI_NOOP);
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	OUT_RING(flip_addr);
+        ADVANCE_LP_RING();
+
+	overlay->last_flip_req = i915_add_request(dev, NULL, 0);
+}
+
+static int intel_overlay_wait_flip(struct intel_overlay *overlay)
+{
+	struct drm_device *dev = overlay->dev;
+        drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
+	u32 tmp;
+	RING_LOCALS;
+
+	if (overlay->last_flip_req != 0) {
+		ret = i915_do_wait_request(dev, overlay->last_flip_req, 0);
+
+		if (ret != 0)
+			return ret;
+
+		overlay->last_flip_req = 0;
+
+		tmp = I915_READ(ISR);
+
+		if (!(tmp & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT))
+			return 0;
+	}
+
+	/* synchronous slowpath */
+	BEGIN_LP_RING(2);
         OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
         OUT_RING(MI_NOOP);
         ADVANCE_LP_RING();
@@ -279,13 +307,8 @@
 		DRM_ERROR("intel overlay: ring sync failed, hw likely wedged\n");
 		overlay->hw_wedged = 1;
 	}
-}
 
-static int intel_overlay_wait_flip(struct intel_overlay *overlay)
-{
-	/* don't overcomplicate things for now with asynchronous operations
-	 * see comment above */
-	return 0;
+	return ret;
 }
 
 /* overlay needs to be disabled in OCMD reg */
@@ -344,7 +367,9 @@
 	return ret;
 }
 
-/* wait for pending overlay flip and release old frame */
+/* Wait for pending overlay flip and release old frame.
+ * Needs to be called before the overlay register are changed
+ * via intel_overlay_(un)map_regs_atomic */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
 	int ret;