mmc: block: Error handling fixes and support of reset_all

This does following:

1. Adds support to handle multiple cmdq software requests timeout.
   Currently we schedule error handling work for the first timedout
   request. We requeue all pending tasks, knock off all tasks from
   device queue and reset card and controller as part of this, then
   for all other software requests timeout, if it comes while executing
   error work, we return BLK_EH_NOT_HANDLED.
   This fixes BUG_ON in case of multiple requests timesout together.

2. Current code resets CQE, power cycle the card and requeue all the
   requests in case of any error.

3. mmc_blk_cmdq_err work takes care freeing up all resource allocation
   like clk and rpm hold, in case of reset_all.

4. Currently we were never clearing error CMDQ_STATE_ERR in case of any
   error. This patch takes care of this bug.

Change-Id: I83d483c11fe2d7f2e462086cc3c0932057304c0d
Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
[subhashj@codeaurora.org: fixed compilation error]
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
[xiaonian@codeaurora.org: fixed merge conflicts]
Signed-off-by: Xiaonian Wang <xiaonian@codeaurora.org>
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 14e4d57..5a91f9c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -3275,24 +3275,122 @@
 
 static void mmc_blk_cmdq_reset(struct mmc_host *host, bool clear_all)
 {
-	if (!host->cmdq_ops->reset)
-		return;
+	int err = 0;
 
-	if (!test_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state)) {
-		if (mmc_cmdq_halt(host, true)) {
-			pr_err("%s: halt failed\n", mmc_hostname(host));
-			goto reset;
-		}
+	if (mmc_cmdq_halt(host, true)) {
+		pr_err("%s: halt failed\n", mmc_hostname(host));
+		goto reset;
 	}
 
 	if (clear_all)
 		mmc_cmdq_discard_queue(host, 0);
 reset:
-	mmc_hw_reset(host);
 	mmc_host_clk_hold(host);
-	host->cmdq_ops->reset(host, true);
+	host->cmdq_ops->disable(host, true);
 	mmc_host_clk_release(host);
-	clear_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
+	err = mmc_cmdq_hw_reset(host);
+	if (err && err != -EOPNOTSUPP) {
+		pr_err("%s: failed to cmdq_hw_reset err = %d\n",
+				mmc_hostname(host), err);
+		mmc_host_clk_hold(host);
+		host->cmdq_ops->enable(host);
+		mmc_host_clk_release(host);
+		mmc_cmdq_halt(host, false);
+		goto out;
+	}
+	/*
+	 * CMDQ HW reset would have already made CQE
+	 * in unhalted state, but reflect the same
+	 * in software state of cmdq_ctx.
+	 */
+	mmc_host_clr_halt(host);
+out:
+	return;
+}
+
+/**
+ * is_cmdq_dcmd_req - Checks if tag belongs to DCMD request.
+ * @q:		request_queue pointer.
+ * @tag:	tag number of request to check.
+ *
+ * This function checks if the request with tag number "tag"
+ * is a DCMD request or not based on cmdq_req_flags set.
+ *
+ * returns true if DCMD req, otherwise false.
+ */
+static bool is_cmdq_dcmd_req(struct request_queue *q, int tag)
+{
+	struct request *req;
+	struct mmc_queue_req *mq_rq;
+	struct mmc_cmdq_req *cmdq_req;
+
+	req = blk_queue_find_tag(q, tag);
+	if (WARN_ON(!req))
+		goto out;
+	mq_rq = req->special;
+	if (WARN_ON(!mq_rq))
+		goto out;
+	cmdq_req = &(mq_rq->cmdq_req);
+	return (cmdq_req->cmdq_req_flags & DCMD);
+out:
+	return -ENOENT;
+}
+
+/**
+ * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
+ * @host:	mmc_host pointer.
+ * @err:	error for which reset is performed.
+ *
+ * This function implements reset_all functionality for
+ * cmdq. It resets the controller, power cycle the card,
+ * and invalidate all busy tags(requeue all request back to
+ * elevator).
+ */
+static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
+{
+	struct mmc_request *mrq = host->err_mrq;
+	struct mmc_card *card = host->card;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct request_queue *q;
+	int itag = 0;
+	int ret = 0;
+
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
+	WARN_ON(!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
+
+	#ifdef CONFIG_MMC_CLKGATE
+	pr_debug("%s: %s: active_reqs = %lu, clk_requests = %d\n",
+			mmc_hostname(host), __func__,
+			ctx_info->active_reqs, host->clk_requests);
+	#endif
+
+	mmc_blk_cmdq_reset(host, false);
+
+	for_each_set_bit(itag, &ctx_info->active_reqs,
+			host->num_cq_slots) {
+		ret = is_cmdq_dcmd_req(q, itag);
+		if (WARN_ON(ret == -ENOENT))
+			continue;
+		if (!ret) {
+			WARN_ON(!test_and_clear_bit(itag,
+				 &ctx_info->data_active_reqs));
+		} else {
+			clear_bit(CMDQ_STATE_DCMD_ACTIVE,
+					&ctx_info->curr_state);
+		}
+		mmc_cmdq_post_req(host, itag, err);
+		WARN_ON(!test_and_clear_bit(itag,
+					&ctx_info->active_reqs));
+		mmc_host_clk_release(host);
+		mmc_put_card(card);
+	}
+
+	spin_lock_irq(q->queue_lock);
+	blk_queue_invalidate_tags(q);
+	spin_unlock_irq(q->queue_lock);
 }
 
 static void mmc_blk_cmdq_shutdown(struct mmc_queue *mq)
@@ -3335,6 +3433,7 @@
 	struct mmc_queue_req *mq_rq = req->special;
 	struct mmc_request *mrq;
 	struct mmc_cmdq_req *cmdq_req;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
 
 	BUG_ON(!host);
 
@@ -3360,32 +3459,40 @@
 	else
 		mrq->data->error = -ETIMEDOUT;
 
-	BUG_ON(host->err_mrq != NULL);
-	host->err_mrq = mrq;
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
+		test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
+		return BLK_EH_NOT_HANDLED;
 
-	mmc_host_clk_release(mrq->host);
+	set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
 	return BLK_EH_HANDLED;
 }
 
+/*
+ * mmc_blk_cmdq_err: error handling of cmdq error requests.
+ * Function should be called in context of error out request
+ * which has claim_host and rpm acquired.
+ * This may be called with CQ engine halted. Make sure to
+ * unhalt it after error recovery.
+ *
+ * TODO: Currently cmdq error handler does reset_all in case
+ * of any erorr. Need to optimize error handling.
+ */
 static void mmc_blk_cmdq_err(struct mmc_queue *mq)
 {
-	int err;
-	int retry = 0;
-	int gen_err;
-	u32 status;
-
 	struct mmc_host *host = mq->card->host;
 	struct mmc_request *mrq = host->err_mrq;
-	struct mmc_card *card = mq->card;
 	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
-	struct request_queue *q = mrq->req->q;
-	int tag = mrq->req->tag;
+	struct request_queue *q;
+	int err;
 
-	pm_runtime_get_sync(&card->dev);
 	mmc_host_clk_hold(host);
 	host->cmdq_ops->dumpstate(host);
 	mmc_host_clk_release(host);
 
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
 	err = mmc_cmdq_halt(host, true);
 	if (err) {
 		pr_err("halt: failed: %d\n", err);
@@ -3394,74 +3501,44 @@
 
 	/* RED error - Fatal: requires reset */
 	if (mrq->cmdq_req->resp_err) {
+		err = mrq->cmdq_req->resp_err;
 		pr_crit("%s: Response error detected: Device in bad state\n",
 			mmc_hostname(host));
-		blk_end_request_all(mrq->req, -EIO);
 		goto reset;
 	}
 
-	if (mrq->data && mrq->data->error) {
-		blk_end_request_all(mrq->req, mrq->data->error);
-		for (; retry < MAX_RETRIES; retry++) {
-			err = get_card_status(card, &status, 0);
-			if (!err)
-				break;
-		}
+	/*
+	 * In case of software request time-out, we schedule err work only for
+	 * the first error out request and handles all other request in flight
+	 * here.
+	 */
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
+		err = -ETIMEDOUT;
+	} else if (mrq->data && mrq->data->error) {
+		err = mrq->data->error;
+	} else if (mrq->cmd && mrq->cmd->error) {
+		/* DCMD commands */
+		err = mrq->cmd->error;
 
-		if (err) {
-			pr_err("%s: No response from card !!!\n",
-			       mmc_hostname(host));
-			goto reset;
-		}
-
-		if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
-		    R1_CURRENT_STATE(status) == R1_STATE_RCV) {
-			err =  send_stop(card, MMC_CMDQ_STOP_TIMEOUT_MS,
-					 mrq->req, &gen_err, &status);
-			if (err) {
-				pr_err("%s: error %d sending stop (%d) command\n",
-					mmc_hostname(host),
-					err, status);
-				goto reset;
-			}
-		}
-
-		if (mmc_cmdq_discard_queue(host, tag))
-			goto reset;
-		else
-			goto unhalt;
-	}
-
-	/* DCMD commands */
-	if (mrq->cmd && mrq->cmd->error) {
 		/*
 		 * Notify completion for non flush commands like discard
 		 * that wait for DCMD finish.
 		 */
 		if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
 			complete(&mrq->completion);
-			goto reset;
 		}
-		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
-		blk_end_request_all(mrq->req, mrq->cmd->error);
 	}
 
 reset:
-	spin_lock_irq(mq->queue->queue_lock);
-	blk_queue_invalidate_tags(q);
-	spin_unlock_irq(mq->queue->queue_lock);
-	mmc_blk_cmdq_reset(host, true);
-	goto out;
-
-unhalt:
+	mmc_blk_cmdq_reset_all(host, err);
+	if (mrq->cmdq_req->resp_err)
+		mrq->cmdq_req->resp_err = false;
 	mmc_cmdq_halt(host, false);
 
-out:
 	host->err_mrq = NULL;
-	pm_runtime_mark_last_busy(&card->dev);
-	clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
+	clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
+	WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
 	wake_up(&ctx_info->wait);
-	__mmc_put_card(card);
 }
 
 /* invoked by block layer in softirq context */
@@ -3481,31 +3558,38 @@
 	else if (mrq->data && mrq->data->error)
 		err = mrq->data->error;
 
-	/* clear pending request */
-	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
-				   &ctx_info->active_reqs));
-
-	if (cmdq_req->cmdq_req_flags & DCMD)
-		is_dcmd = true;
-	else
-		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
-			 &ctx_info->data_active_reqs));
-
-	mmc_cmdq_post_req(host, cmdq_req->tag, err);
-	if (err) {
-		pr_err("%s: %s: txfr error: %d\n", mmc_hostname(mrq->host),
-		       __func__, err);
+	if (err || cmdq_req->resp_err) {
+		pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
+				mmc_hostname(mrq->host), __func__, err,
+				cmdq_req->resp_err);
 		if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
 			pr_err("%s: CQ in error state, ending current req: %d\n",
 				__func__, err);
-			blk_end_request_all(rq, err);
 		} else {
 			set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
+			BUG_ON(host->err_mrq != NULL);
+			host->err_mrq = mrq;
 			schedule_work(&mq->cmdq_err_work);
 		}
 		goto out;
 	}
+	/*
+	 * In case of error CMDQ is expected to be either in halted
+	 * or disable state so cannot receive any completion of
+	 * other requests.
+	 */
+	BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
 
+	/* clear pending request */
+	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+				   &ctx_info->active_reqs));
+	if (cmdq_req->cmdq_req_flags & DCMD)
+		is_dcmd = true;
+	else
+		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+					 &ctx_info->data_active_reqs));
+
+	mmc_cmdq_post_req(host, cmdq_req->tag, err);
 	if (cmdq_req->cmdq_req_flags & DCMD) {
 		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
 		blk_end_request_all(rq, err);
@@ -3517,10 +3601,11 @@
 out:
 
 	mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd);
-	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
+	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
 		wake_up(&ctx_info->wait);
+		mmc_put_card(host->card);
+	}
 
-	mmc_put_card(host->card);
 	if (!ctx_info->active_reqs)
 		wake_up_interruptible(&host->cmdq_ctx.queue_empty_wq);
 
@@ -3535,7 +3620,6 @@
 {
 	struct request *req = mrq->req;
 
-	mmc_host_clk_release(mrq->host);
 	blk_complete_request(req);
 }
 EXPORT_SYMBOL(mmc_blk_cmdq_req_done);