mmc: cmdq_hci: Add CQE WA for CQE HW bugs

This adds WA to handle few CQE error which
can cause HALT to fail or can give CQTERRI info
as NULL.
Like - currently there is a HW bug that in
case of ADMA error CQE will not stop and CQTERRI
register will not be updated with any valid values.
Halt will also most likely fail in case of ADMA error.

Thus possible WA for this is to disable CQE,
do reset_all and requeue all the requests in flight.

Change-Id: I5aaddbb7bec1de7dbc959144dc2f1a5ad16789ff
Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
diff --git a/drivers/mmc/host/cmdq_hci.c b/drivers/mmc/host/cmdq_hci.c
index 5069dc7..8a56d1f 100644
--- a/drivers/mmc/host/cmdq_hci.c
+++ b/drivers/mmc/host/cmdq_hci.c
@@ -37,6 +37,7 @@
 #define HALT_TIMEOUT_MS 1000
 
 static int cmdq_halt_poll(struct mmc_host *mmc);
+static int cmdq_halt(struct mmc_host *mmc, bool halt);
 
 #ifdef CONFIG_PM_RUNTIME
 static int cmdq_runtime_pm_get(struct cmdq_host *host)
@@ -172,6 +173,27 @@
 	pr_err("-------------------------\n");
 }
 
+static void cmdq_dump_adma_mem(struct cmdq_host *cq_host)
+{
+	struct mmc_host *mmc = cq_host->mmc;
+	dma_addr_t desc_dma;
+	int tag = 0;
+	unsigned long data_active_reqs =
+		mmc->cmdq_ctx.data_active_reqs;
+	unsigned long desc_size =
+		(cq_host->mmc->max_segs * cq_host->trans_desc_len);
+
+	for_each_set_bit(tag, &data_active_reqs, cq_host->num_slots) {
+		desc_dma = get_trans_desc_dma(cq_host, tag);
+		pr_err("%s: %s: tag = %d, trans_dma(phys) = %pad, trans_desc(virt) = 0x%p\n",
+				mmc_hostname(mmc), __func__, tag,
+				&desc_dma, get_trans_desc(cq_host, tag));
+		print_hex_dump(KERN_ERR, "cmdq-adma:", DUMP_PREFIX_ADDRESS,
+				32, 8, get_trans_desc(cq_host, tag),
+				(desc_size), false);
+	}
+}
+
 static void cmdq_dumpregs(struct cmdq_host *cq_host)
 {
 	struct mmc_host *mmc = cq_host->mmc;
@@ -737,6 +759,7 @@
 	unsigned long err_info = 0;
 	struct mmc_request *mrq;
 	int ret;
+	u32 dbr_set = 0;
 
 	status = cmdq_readl(cq_host, CQIS);
 	cmdq_writel(cq_host, status, CQIS);
@@ -762,6 +785,43 @@
 					mmc_hostname(mmc), __func__, ret);
 		cmdq_dumpregs(cq_host);
 
+		if (!err_info) {
+			/*
+			 * It may so happen sometimes for few errors(like ADMA)
+			 * that HW cannot give CQTERRI info.
+			 * Thus below is a HW WA for recovering from such
+			 * scenario.
+			 * - To halt/disable CQE and do reset_all.
+			 *   Since there is no way to know which tag would
+			 *   have caused such error, so check for any first
+			 *   bit set in doorbell and proceed with an error.
+			 */
+			dbr_set = cmdq_readl(cq_host, CQTDBR);
+			if (!dbr_set) {
+				pr_err("%s: spurious/force error interrupt\n",
+						mmc_hostname(mmc));
+				cmdq_halt(mmc, false);
+				mmc_host_clr_halt(mmc);
+				return IRQ_HANDLED;
+			}
+
+			tag = ffs(dbr_set) - 1;
+			pr_err("%s: error tag selected: tag = %lu\n",
+					mmc_hostname(mmc), tag);
+			mrq = get_req_by_tag(cq_host, tag);
+			if (mrq->data)
+				mrq->data->error = err;
+			else
+				mrq->cmd->error = err;
+			/*
+			 * Get ADMA descriptor memory in case of ADMA
+			 * error for debug.
+			 */
+			if (err == -EIO)
+				cmdq_dump_adma_mem(cq_host);
+			goto skip_cqterri;
+		}
+
 		if (err_info & CQ_RMEFV) {
 			tag = GET_CMD_ERR_TAG(err_info);
 			pr_err("%s: CMD err tag: %lu\n", __func__, tag);
@@ -779,6 +839,14 @@
 			mrq->data->error = err;
 		}
 
+skip_cqterri:
+		/*
+		 * If CQE halt fails then, disable CQE
+		 * from processing any further requests
+		 */
+		if (ret)
+			cmdq_disable(mmc, true);
+
 		/*
 		 * CQE detected a response error from device
 		 * In most cases, this would require a reset.