Merge "drm: msm: sde: add api to get if dma wait is needed"
diff --git a/drivers/gpu/drm/msm/sde/sde_encoder.c b/drivers/gpu/drm/msm/sde/sde_encoder.c
index edefa2d..7d1e4bf 100644
--- a/drivers/gpu/drm/msm/sde/sde_encoder.c
+++ b/drivers/gpu/drm/msm/sde/sde_encoder.c
@@ -2625,6 +2625,29 @@
 
 	pending_flush = 0x0;
 
+	/*
+	 * Trigger LUT DMA flush, this might need a wait, so we need
+	 * to do this outside of the atomic context
+	 */
+	for (i = 0; i < sde_enc->num_phys_encs; i++) {
+		struct sde_encoder_phys *phys = sde_enc->phys_encs[i];
+		bool wait_for_dma = false;
+
+		if (!phys || phys->enable_state == SDE_ENC_DISABLED)
+			continue;
+
+		ctl = phys->hw_ctl;
+		if (!ctl)
+			continue;
+
+		if (phys->ops.wait_dma_trigger)
+			wait_for_dma = phys->ops.wait_dma_trigger(phys);
+
+		if (phys->hw_ctl->ops.reg_dma_flush)
+			phys->hw_ctl->ops.reg_dma_flush(phys->hw_ctl,
+					wait_for_dma);
+	}
+
 	/* update pending counts and trigger kickoff ctl flush atomically */
 	spin_lock_irqsave(&sde_enc->enc_spinlock, lock_flags);
 
@@ -2652,8 +2675,7 @@
 				phys->split_role == ENC_ROLE_SLAVE) &&
 				phys->split_role != ENC_ROLE_SKIP)
 			set_bit(i, sde_enc->frame_busy_mask);
-		if (phys->hw_ctl->ops.reg_dma_flush)
-			phys->hw_ctl->ops.reg_dma_flush(phys->hw_ctl);
+
 		if (!phys->ops.needs_single_flush ||
 				!phys->ops.needs_single_flush(phys))
 			_sde_encoder_trigger_flush(&sde_enc->base, phys, 0x0);
diff --git a/drivers/gpu/drm/msm/sde/sde_encoder_phys.h b/drivers/gpu/drm/msm/sde/sde_encoder_phys.h
index b1aa696..e5a4da4 100644
--- a/drivers/gpu/drm/msm/sde/sde_encoder_phys.h
+++ b/drivers/gpu/drm/msm/sde/sde_encoder_phys.h
@@ -132,6 +132,8 @@
  * @is_autorefresh_enabled:	provides the autorefresh current
  *                              enable/disable state.
  * @get_line_count:		Obtain current vertical line count
+ * @wait_dma_trigger:		Returns true if lut dma has to trigger and wait
+ *                              unitl transaction is complete.
  */
 
 struct sde_encoder_phys_ops {
@@ -176,6 +178,7 @@
 	void (*restore)(struct sde_encoder_phys *phys);
 	bool (*is_autorefresh_enabled)(struct sde_encoder_phys *phys);
 	int (*get_line_count)(struct sde_encoder_phys *phys);
+	bool (*wait_dma_trigger)(struct sde_encoder_phys *phys);
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/sde/sde_encoder_phys_vid.c b/drivers/gpu/drm/msm/sde/sde_encoder_phys_vid.c
index 1b30814..a983b7c 100644
--- a/drivers/gpu/drm/msm/sde/sde_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/sde/sde_encoder_phys_vid.c
@@ -591,6 +591,37 @@
 	return ret;
 }
 
+static bool sde_encoder_phys_vid_wait_dma_trigger(
+		struct sde_encoder_phys *phys_enc)
+{
+	struct sde_encoder_phys_vid *vid_enc;
+	struct sde_hw_intf *intf;
+	struct sde_hw_ctl *ctl;
+	struct intf_status status;
+
+	if (!phys_enc) {
+		SDE_ERROR("invalid encoder\n");
+		return false;
+	}
+
+	vid_enc = to_sde_encoder_phys_vid(phys_enc);
+	intf = vid_enc->hw_intf;
+	ctl = phys_enc->hw_ctl;
+	if (!vid_enc->hw_intf || !phys_enc->hw_ctl) {
+		SDE_ERROR("invalid hw_intf %d hw_ctl %d\n",
+			vid_enc->hw_intf != NULL, phys_enc->hw_ctl != NULL);
+		return false;
+	}
+
+	if (!intf->ops.get_status)
+		return false;
+
+	intf->ops.get_status(intf, &status);
+
+	/* if interface is not enabled, return true to wait for dma trigger */
+	return status.is_en ? false : true;
+}
+
 static void sde_encoder_phys_vid_enable(struct sde_encoder_phys *phys_enc)
 {
 	struct msm_drm_private *priv;
@@ -945,6 +976,7 @@
 	ops->trigger_flush = sde_encoder_helper_trigger_flush;
 	ops->hw_reset = sde_encoder_helper_hw_reset;
 	ops->get_line_count = sde_encoder_phys_vid_get_line_count;
+	ops->wait_dma_trigger = sde_encoder_phys_vid_wait_dma_trigger;
 }
 
 struct sde_encoder_phys *sde_encoder_phys_vid_init(
diff --git a/drivers/gpu/drm/msm/sde/sde_hw_ctl.c b/drivers/gpu/drm/msm/sde/sde_hw_ctl.c
index 3df7474..88f821d 100644
--- a/drivers/gpu/drm/msm/sde/sde_hw_ctl.c
+++ b/drivers/gpu/drm/msm/sde/sde_hw_ctl.c
@@ -565,12 +565,13 @@
 	SDE_REG_WRITE(c, CTL_ROT_TOP, val);
 }
 
-static void sde_hw_reg_dma_flush(struct sde_hw_ctl *ctx)
+static void sde_hw_reg_dma_flush(struct sde_hw_ctl *ctx, bool blocking)
 {
 	struct sde_hw_reg_dma_ops *ops = sde_reg_dma_get_ops();
 
 	if (ops && ops->last_command)
-		ops->last_command(ctx, DMA_CTL_QUEUE0);
+		ops->last_command(ctx, DMA_CTL_QUEUE0,
+		    (blocking ? REG_DMA_WAIT4_COMP : REG_DMA_NOWAIT));
 }
 
 static void _setup_ctl_ops(struct sde_hw_ctl_ops *ops,
diff --git a/drivers/gpu/drm/msm/sde/sde_hw_ctl.h b/drivers/gpu/drm/msm/sde/sde_hw_ctl.h
index 97bc1c1..bad80f0 100644
--- a/drivers/gpu/drm/msm/sde/sde_hw_ctl.h
+++ b/drivers/gpu/drm/msm/sde/sde_hw_ctl.h
@@ -214,8 +214,9 @@
 	/**
 	 * Flush the reg dma by sending last command.
 	 * @ctx       : ctl path ctx pointer
+	 * @blocking  : if set to true api will block until flush is done
 	 */
-	void (*reg_dma_flush)(struct sde_hw_ctl *ctx);
+	void (*reg_dma_flush)(struct sde_hw_ctl *ctx, bool blocking);
 
 };
 
diff --git a/drivers/gpu/drm/msm/sde/sde_hw_reg_dma_v1.c b/drivers/gpu/drm/msm/sde/sde_hw_reg_dma_v1.c
index d7b7625..3326aa2 100644
--- a/drivers/gpu/drm/msm/sde/sde_hw_reg_dma_v1.c
+++ b/drivers/gpu/drm/msm/sde/sde_hw_reg_dma_v1.c
@@ -9,11 +9,13 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+#include <linux/iopoll.h>
 #include "sde_hw_mdss.h"
 #include "sde_hw_ctl.h"
 #include "sde_hw_reg_dma_v1.h"
 #include "msm_drv.h"
 #include "msm_mmu.h"
+#include "sde_dbg.h"
 
 #define GUARD_BYTES (BIT(8) - 1)
 #define ALIGNED_OFFSET (U32_MAX & ~(GUARD_BYTES))
@@ -93,6 +95,20 @@
 	[PCC] = GRP_DSPP_HW_BLK_SELECT,
 };
 
+static u32 ctl_trigger_done_mask[CTL_MAX][DMA_CTL_QUEUE_MAX] = {
+	[CTL_0][0] = BIT(16),
+	[CTL_0][1] = BIT(21),
+	[CTL_1][0] = BIT(17),
+	[CTL_1][1] = BIT(22),
+	[CTL_2][0] = BIT(18),
+	[CTL_2][1] = BIT(23),
+	[CTL_3][0] = BIT(19),
+	[CTL_3][1] = BIT(24),
+};
+
+static int reg_dma_int_status_off;
+static int reg_dma_clear_status_off;
+
 static int validate_dma_cfg(struct sde_reg_dma_setup_ops_cfg *cfg);
 static int validate_write_decode_sel(struct sde_reg_dma_setup_ops_cfg *cfg);
 static int validate_write_reg(struct sde_reg_dma_setup_ops_cfg *cfg);
@@ -110,7 +126,8 @@
 static int setup_payload_v1(struct sde_reg_dma_setup_ops_cfg *cfg);
 static int kick_off_v1(struct sde_reg_dma_kickoff_cfg *cfg);
 static int reset_v1(struct sde_hw_ctl *ctl);
-static int last_cmd_v1(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q);
+static int last_cmd_v1(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q,
+		enum sde_reg_dma_last_cmd_mode mode);
 static struct sde_reg_dma_buffer *alloc_reg_dma_buf_v1(u32 size);
 static int dealloc_reg_dma_v1(struct sde_reg_dma_buffer *lut_buf);
 
@@ -130,7 +147,7 @@
 	[REG_BLK_WRITE_MULTIPLE] = validate_write_multi_lut_reg,
 };
 
-static struct sde_reg_dma_buffer *last_cmd_buf;
+static struct sde_reg_dma_buffer *last_cmd_buf[CTL_MAX];
 
 static void get_decode_sel(unsigned long blk, u32 *decode_sel)
 {
@@ -466,6 +483,8 @@
 
 	SET_UP_REG_DMA_REG(hw, reg_dma);
 	SDE_REG_WRITE(&hw, REG_DMA_OP_MODE_OFF, BIT(0));
+	SDE_REG_WRITE(&hw, reg_dma_clear_status_off,
+		ctl_trigger_done_mask[cfg->ctl->idx][cfg->queue_select]);
 	SDE_REG_WRITE(&hw, reg_dma_ctl_queue_off[cfg->ctl->idx],
 			cfg->dma_buf->iova);
 	SDE_REG_WRITE(&hw, reg_dma_ctl_queue_off[cfg->ctl->idx] + 0x4,
@@ -479,17 +498,32 @@
 
 int init_v1(struct sde_hw_reg_dma *cfg)
 {
-	int i = 0;
+	int i = 0, rc = 0;
 
 	if (!cfg)
 		return -EINVAL;
 
 	reg_dma = cfg;
-	if (!last_cmd_buf) {
-		last_cmd_buf = alloc_reg_dma_buf_v1(REG_DMA_HEADERS_BUFFER_SZ);
-		if (IS_ERR_OR_NULL(last_cmd_buf))
-			return -EINVAL;
+	for (i = CTL_0; i < CTL_MAX; i++) {
+		if (!last_cmd_buf[i]) {
+			last_cmd_buf[i] =
+			    alloc_reg_dma_buf_v1(REG_DMA_HEADERS_BUFFER_SZ);
+			if (IS_ERR_OR_NULL(last_cmd_buf[i])) {
+				rc = -EINVAL;
+				break;
+			}
+		}
 	}
+	if (rc) {
+		for (i = 0; i < CTL_MAX; i++) {
+			if (!last_cmd_buf[i])
+				continue;
+			dealloc_reg_dma_v1(last_cmd_buf[i]);
+			last_cmd_buf[i] = NULL;
+		}
+		return rc;
+	}
+
 	reg_dma->ops.check_support = check_support_v1;
 	reg_dma->ops.setup_payload = setup_payload_v1;
 	reg_dma->ops.kick_off = kick_off_v1;
@@ -503,6 +537,8 @@
 	for (i = CTL_1; i < ARRAY_SIZE(reg_dma_ctl_queue_off); i++)
 		reg_dma_ctl_queue_off[i] = reg_dma_ctl_queue_off[i - 1] +
 			(sizeof(u32) * 4);
+	reg_dma_int_status_off = 0x90;
+	reg_dma_clear_status_off = 0xa0;
 
 	return 0;
 }
@@ -767,24 +803,28 @@
 	return 0;
 }
 
-static int last_cmd_v1(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q)
+static int last_cmd_v1(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q,
+		enum sde_reg_dma_last_cmd_mode mode)
 {
 	struct sde_reg_dma_setup_ops_cfg cfg;
 	struct sde_reg_dma_kickoff_cfg kick_off;
+	struct sde_hw_blk_reg_map hw;
+	u32 val;
+	int rc;
 
-	if (!last_cmd_buf || !ctl || q >= DMA_CTL_QUEUE_MAX) {
-		DRM_ERROR("invalid param buf %pK ctl %pK q %d\n", last_cmd_buf,
-				ctl, q);
+	if (!ctl || ctl->idx >= CTL_MAX || q >= DMA_CTL_QUEUE_MAX) {
+		DRM_ERROR("ctl %pK q %d index %d\n", ctl, q,
+				((ctl) ? ctl->idx : -1));
 		return -EINVAL;
 	}
 
-	if (!last_cmd_buf->iova) {
-		DRM_DEBUG("iova not set, possible secure session\n");
+	if (!last_cmd_buf[ctl->idx] || !last_cmd_buf[ctl->idx]->iova) {
+		DRM_DEBUG("invalid last cmd buf for idx %d\n", ctl->idx);
 		return 0;
 	}
 
-	cfg.dma_buf = last_cmd_buf;
-	reset_reg_dma_buffer_v1(last_cmd_buf);
+	cfg.dma_buf = last_cmd_buf[ctl->idx];
+	reset_reg_dma_buffer_v1(last_cmd_buf[ctl->idx]);
 	if (validate_last_cmd(&cfg)) {
 		DRM_ERROR("validate buf failed\n");
 		return -EINVAL;
@@ -800,18 +840,37 @@
 	kick_off.trigger_mode = WRITE_IMMEDIATE;
 	kick_off.last_command = 1;
 	kick_off.op = REG_DMA_WRITE;
-	kick_off.dma_buf = last_cmd_buf;
+	kick_off.dma_buf = last_cmd_buf[ctl->idx];
 	if (kick_off_v1(&kick_off)) {
 		DRM_ERROR("kick off last cmd failed\n");
 		return -EINVAL;
 	}
 
+	memset(&hw, 0, sizeof(hw));
+	SET_UP_REG_DMA_REG(hw, reg_dma);
+
+	SDE_EVT32(SDE_EVTLOG_FUNC_ENTRY, mode);
+	if (mode == REG_DMA_WAIT4_COMP) {
+		rc = readl_poll_timeout(hw.base_off + hw.blk_off +
+			reg_dma_int_status_off, val,
+			(val & ctl_trigger_done_mask[ctl->idx][q]),
+			10, 20000);
+		if (rc)
+			DRM_ERROR("poll wait failed %d val %x mask %x\n",
+			    rc, val, ctl_trigger_done_mask[ctl->idx][q]);
+		SDE_EVT32(SDE_EVTLOG_FUNC_EXIT, mode);
+	}
+
 	return 0;
 }
 
 void deinit_v1(void)
 {
-	if (last_cmd_buf)
-		dealloc_reg_dma_v1(last_cmd_buf);
-	last_cmd_buf = NULL;
+	int i = 0;
+
+	for (i = CTL_0; i < CTL_MAX; i++) {
+		if (last_cmd_buf[i])
+			dealloc_reg_dma_v1(last_cmd_buf[i]);
+		last_cmd_buf[i] = NULL;
+	}
 }
diff --git a/drivers/gpu/drm/msm/sde/sde_reg_dma.c b/drivers/gpu/drm/msm/sde/sde_reg_dma.c
index a52abd9..1bef4b8 100644
--- a/drivers/gpu/drm/msm/sde/sde_reg_dma.c
+++ b/drivers/gpu/drm/msm/sde/sde_reg_dma.c
@@ -64,7 +64,7 @@
 }
 
 static int default_last_command(struct sde_hw_ctl *ctl,
-		enum sde_reg_dma_queue q)
+		enum sde_reg_dma_queue q, enum sde_reg_dma_last_cmd_mode mode)
 {
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/sde/sde_reg_dma.h b/drivers/gpu/drm/msm/sde/sde_reg_dma.h
index b9d7843..41a292a 100644
--- a/drivers/gpu/drm/msm/sde/sde_reg_dma.h
+++ b/drivers/gpu/drm/msm/sde/sde_reg_dma.h
@@ -169,6 +169,18 @@
 };
 
 /**
+ * enum sde_reg_dma_last_cmd_mode - defines enums for kick off mode.
+ * @REG_DMA_WAIT4_COMP: last_command api will wait for max of 1 msec allowing
+ *			reg dma trigger to complete.
+ * @REG_DMA_NOWAIT: last_command api will not wait for reg dma trigger
+ *		    completion.
+ */
+enum sde_reg_dma_last_cmd_mode {
+	REG_DMA_WAIT4_COMP,
+	REG_DMA_NOWAIT,
+};
+
+/**
  * struct sde_reg_dma_buffer - defines reg dma buffer structure.
  * @drm_gem_object *buf: drm gem handle for the buffer
  * @asapce : pointer to address space
@@ -265,7 +277,8 @@
 	struct sde_reg_dma_buffer* (*alloc_reg_dma_buf)(u32 size);
 	int (*dealloc_reg_dma)(struct sde_reg_dma_buffer *lut_buf);
 	int (*reset_reg_dma_buf)(struct sde_reg_dma_buffer *buf);
-	int (*last_command)(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q);
+	int (*last_command)(struct sde_hw_ctl *ctl, enum sde_reg_dma_queue q,
+			enum sde_reg_dma_last_cmd_mode mode);
 };
 
 /**