drm/msm: Use customized function to wait for atomic commit done

MDP FLUSH registers could indicate if the previous flush updates
has taken effect at vsync boundary. Making use of this H/W feature
can catch the vsync that happened between CRTC atomic_flush and
*_wait_for_vblanks, to avoid unnecessary wait.

This change allows kms CRTCs to use their own *_wait_for_commit_done
functions to wait for FLUSH register cleared at vsync, before commit
completion.

Signed-off-by: Hai Li <hali@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 73afa21..c4bb9d9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -51,6 +51,11 @@
 	/* if there is a pending flip, these will be non-null: */
 	struct drm_pending_vblank_event *event;
 
+	/* Bits have been flushed at the last commit,
+	 * used to decide if a vsync has happened since last commit.
+	 */
+	u32 flushed_mask;
+
 #define PENDING_CURSOR 0x1
 #define PENDING_FLIP   0x2
 	atomic_t pending;
@@ -93,6 +98,8 @@
 
 	DBG("%s: flush=%08x", mdp4_crtc->name, flush);
 
+	mdp4_crtc->flushed_mask = flush;
+
 	mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush);
 }
 
@@ -537,6 +544,29 @@
 	crtc_flush(crtc);
 }
 
+static void mdp4_crtc_wait_for_flush_done(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	int ret;
+
+	ret = drm_crtc_vblank_get(crtc);
+	if (ret)
+		return;
+
+	ret = wait_event_timeout(dev->vblank[drm_crtc_index(crtc)].queue,
+		!(mdp4_read(mdp4_kms, REG_MDP4_OVERLAY_FLUSH) &
+			mdp4_crtc->flushed_mask),
+		msecs_to_jiffies(50));
+	if (ret <= 0)
+		dev_warn(dev->dev, "vblank time out, crtc=%d\n", mdp4_crtc->id);
+
+	mdp4_crtc->flushed_mask = 0;
+
+	drm_crtc_vblank_put(crtc);
+}
+
 uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
@@ -600,6 +630,15 @@
 	mdp4_write(mdp4_kms, REG_MDP4_DISP_INTF_SEL, intf_sel);
 }
 
+void mdp4_crtc_wait_for_commit_done(struct drm_crtc *crtc)
+{
+	/* wait_for_flush_done is the only case for now.
+	 * Later we will have command mode CRTC to wait for
+	 * other event.
+	 */
+	mdp4_crtc_wait_for_flush_done(crtc);
+}
+
 static const char *dma_names[] = {
 		"DMA_P", "DMA_S", "DMA_E",
 };
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index 88a75cb..531e4ac 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -159,6 +159,12 @@
 	mdp4_disable(mdp4_kms);
 }
 
+static void mdp4_wait_for_crtc_commit_done(struct msm_kms *kms,
+						struct drm_crtc *crtc)
+{
+	mdp4_crtc_wait_for_commit_done(crtc);
+}
+
 static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
 		struct drm_encoder *encoder)
 {
@@ -197,6 +203,7 @@
 		.disable_vblank  = mdp4_disable_vblank,
 		.prepare_commit  = mdp4_prepare_commit,
 		.complete_commit = mdp4_complete_commit,
+		.wait_for_crtc_commit_done = mdp4_wait_for_crtc_commit_done,
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp4_round_pixclk,
 		.preclose        = mdp4_preclose,
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index a537c07..c1ecb9d 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -206,6 +206,7 @@
 void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file);
 void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config);
 void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf, int mixer);
+void mdp4_crtc_wait_for_commit_done(struct drm_crtc *crtc);
 struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 		struct drm_plane *plane, int id, int ovlp_id,
 		enum mdp4_dma dma_id);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 13dd7dd..672ffd4 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -46,6 +46,11 @@
 	/* if there is a pending flip, these will be non-null: */
 	struct drm_pending_vblank_event *event;
 
+	/* Bits have been flushed at the last commit,
+	 * used to decide if a vsync has happened since last commit.
+	 */
+	u32 flushed_mask;
+
 #define PENDING_CURSOR 0x1
 #define PENDING_FLIP   0x2
 	atomic_t pending;
@@ -82,12 +87,12 @@
 	mdp_irq_register(&get_kms(crtc)->base, &mdp5_crtc->vblank);
 }
 
-static void crtc_flush(struct drm_crtc *crtc, u32 flush_mask)
+static u32 crtc_flush(struct drm_crtc *crtc, u32 flush_mask)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
 
 	DBG("%s: flush=%08x", mdp5_crtc->name, flush_mask);
-	mdp5_ctl_commit(mdp5_crtc->ctl, flush_mask);
+	return mdp5_ctl_commit(mdp5_crtc->ctl, flush_mask);
 }
 
 /*
@@ -95,7 +100,7 @@
  * so that we can safely queue unref to current fb (ie. next
  * vblank we know hw is done w/ previous scanout_fb).
  */
-static void crtc_flush_all(struct drm_crtc *crtc)
+static u32 crtc_flush_all(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
 	struct drm_plane *plane;
@@ -103,7 +108,7 @@
 
 	/* this should not happen: */
 	if (WARN_ON(!mdp5_crtc->ctl))
-		return;
+		return 0;
 
 	drm_atomic_crtc_for_each_plane(plane, crtc) {
 		flush_mask |= mdp5_plane_get_flush(plane);
@@ -111,7 +116,7 @@
 
 	flush_mask |= mdp_ctl_flush_mask_lm(mdp5_crtc->lm);
 
-	crtc_flush(crtc, flush_mask);
+	return crtc_flush(crtc, flush_mask);
 }
 
 /* if file!=NULL, this is preclose potential cancel-flip path */
@@ -395,7 +400,9 @@
 		return;
 
 	blend_setup(crtc);
-	crtc_flush_all(crtc);
+
+	mdp5_crtc->flushed_mask = crtc_flush_all(crtc);
+
 	request_pending(crtc, PENDING_FLIP);
 }
 
@@ -600,6 +607,32 @@
 	DBG("%s: error: %08x", mdp5_crtc->name, irqstatus);
 }
 
+static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	int ret;
+
+	/* Should not call this function if crtc is disabled. */
+	if (!mdp5_crtc->ctl)
+		return;
+
+	ret = drm_crtc_vblank_get(crtc);
+	if (ret)
+		return;
+
+	ret = wait_event_timeout(dev->vblank[drm_crtc_index(crtc)].queue,
+		((mdp5_ctl_get_commit_status(mdp5_crtc->ctl) &
+		mdp5_crtc->flushed_mask) == 0),
+		msecs_to_jiffies(50));
+	if (ret <= 0)
+		dev_warn(dev->dev, "vblank time out, crtc=%d\n", mdp5_crtc->id);
+
+	mdp5_crtc->flushed_mask = 0;
+
+	drm_crtc_vblank_put(crtc);
+}
+
 uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
@@ -631,6 +664,7 @@
 		mdp5_crtc->vblank.irqmask = lm2ppdone(lm);
 	else
 		mdp5_crtc->vblank.irqmask = intf2vblank(lm, intf);
+
 	mdp_irq_update(&mdp5_kms->base);
 
 	mdp5_ctl_set_intf(mdp5_crtc->ctl, intf);
@@ -648,6 +682,15 @@
 	return WARN_ON(!crtc) ? NULL : mdp5_crtc->ctl;
 }
 
+void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc)
+{
+	/* wait_for_flush_done is the only case for now.
+	 * Later we will have command mode CRTC to wait for
+	 * other event.
+	 */
+	mdp5_crtc_wait_for_flush_done(crtc);
+}
+
 /* initialize crtc */
 struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 		struct drm_plane *plane, int id)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
index 5488b68..f2530f2 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
@@ -392,8 +392,10 @@
  * CTL registers need to be flushed in some circumstances; if that is the
  * case, some trigger bits will be present in both flush mask and
  * ctl->pending_ctl_trigger.
+ *
+ * Return H/W flushed bit mask.
  */
-int mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask)
+u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
 	struct op_mode *pipeline = &ctl->pipeline;
@@ -424,7 +426,12 @@
 		refill_start_mask(ctl);
 	}
 
-	return 0;
+	return flush_mask;
+}
+
+u32 mdp5_ctl_get_commit_status(struct mdp5_ctl *ctl)
+{
+	return ctl_read(ctl, REG_MDP5_CTL_FLUSH(ctl->id));
 }
 
 void mdp5_ctl_release(struct mdp5_ctl *ctl)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
index 7a62000..4678228 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
@@ -88,7 +88,8 @@
 u32 mdp_ctl_flush_mask_encoder(struct mdp5_interface *intf);
 
 /* @flush_mask: see CTL flush masks definitions below */
-int mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask);
+u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask);
+u32 mdp5_ctl_get_commit_status(struct mdp5_ctl *ctl);
 
 void mdp5_ctl_release(struct mdp5_ctl *ctl);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index bbacf9d..206f758 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -80,6 +80,12 @@
 	mdp5_disable(mdp5_kms);
 }
 
+static void mdp5_wait_for_crtc_commit_done(struct msm_kms *kms,
+						struct drm_crtc *crtc)
+{
+	mdp5_crtc_wait_for_commit_done(crtc);
+}
+
 static long mdp5_round_pixclk(struct msm_kms *kms, unsigned long rate,
 		struct drm_encoder *encoder)
 {
@@ -141,6 +147,7 @@
 		.disable_vblank  = mdp5_disable_vblank,
 		.prepare_commit  = mdp5_prepare_commit,
 		.complete_commit = mdp5_complete_commit,
+		.wait_for_crtc_commit_done = mdp5_wait_for_crtc_commit_done,
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 2c0de17..e0eb245 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -237,6 +237,7 @@
 struct mdp5_ctl *mdp5_crtc_get_ctl(struct drm_crtc *crtc);
 void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file);
 void mdp5_crtc_set_intf(struct drm_crtc *crtc, struct mdp5_interface *intf);
+void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc);
 struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 		struct drm_plane *plane, int id);
 
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 5b19212..84c0a72 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -84,6 +84,33 @@
 	kfree(c);
 }
 
+static void msm_atomic_wait_for_commit_done(struct drm_device *dev,
+		struct drm_atomic_state *old_state)
+{
+	struct drm_crtc *crtc;
+	struct msm_drm_private *priv = old_state->dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	int ncrtcs = old_state->dev->mode_config.num_crtc;
+	int i;
+
+	for (i = 0; i < ncrtcs; i++) {
+		crtc = old_state->crtcs[i];
+
+		if (!crtc)
+			continue;
+
+		if (!crtc->state->enable)
+			continue;
+
+		/* Legacy cursor ioctls are completely unsynced, and userspace
+		 * relies on that (by doing tons of cursor updates). */
+		if (old_state->legacy_cursor_update)
+			continue;
+
+		kms->funcs->wait_for_crtc_commit_done(kms, crtc);
+	}
+}
+
 /* The (potentially) asynchronous part of the commit.  At this point
  * nothing can fail short of armageddon.
  */
@@ -115,7 +142,7 @@
 	 * not be critical path)
 	 */
 
-	drm_atomic_helper_wait_for_vblanks(dev, state);
+	msm_atomic_wait_for_commit_done(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
@@ -139,7 +166,6 @@
 	c->fence = max(c->fence, msm_gem_fence(to_msm_bo(obj), MSM_PREP_READ));
 }
 
-
 int msm_atomic_check(struct drm_device *dev,
 		     struct drm_atomic_state *state)
 {
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index a9f17bd..9bcabaa 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -43,6 +43,9 @@
 	/* modeset, bracketing atomic_commit(): */
 	void (*prepare_commit)(struct msm_kms *kms, struct drm_atomic_state *state);
 	void (*complete_commit)(struct msm_kms *kms, struct drm_atomic_state *state);
+	/* functions to wait for atomic commit completed on each CRTC */
+	void (*wait_for_crtc_commit_done)(struct msm_kms *kms,
+					struct drm_crtc *crtc);
 	/* misc: */
 	const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format);
 	long (*round_pixclk)(struct msm_kms *kms, unsigned long rate,