mmc: block: Error handling fixes and support of reset_all This does following: 1. Adds support to handle multiple cmdq software requests timeout. Currently we schedule error handling work for the first timedout request. We requeue all pending tasks, knock off all tasks from device queue and reset card and controller as part of this, then for all other software requests timeout, if it comes while executing error work, we return BLK_EH_NOT_HANDLED. This fixes BUG_ON in case of multiple requests timesout together. 2. Current code resets CQE, power cycle the card and requeue all the requests in case of any error. 3. mmc_blk_cmdq_err work takes care freeing up all resource allocation like clk and rpm hold, in case of reset_all. 4. Currently we were never clearing error CMDQ_STATE_ERR in case of any error. This patch takes care of this bug. Change-Id: I83d483c11fe2d7f2e462086cc3c0932057304c0d Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org> [subhashj@codeaurora.org: fixed compilation error] Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org> [xiaonian@codeaurora.org: fixed merge conflicts] Signed-off-by: Xiaonian Wang <xiaonian@codeaurora.org>

commit: d7e5be8b22317af74bd53faa4698a718121c748e [log] [tgz]
author: Ritesh Harjani <riteshh@codeaurora.org> Wed Oct 07 16:42:47 2015 +0530
committer: Xiaonian Wang <xiaonian@codeaurora.org> Fri Apr 07 17:31:48 2017 -0700
tree: a067013141ed66138e58bb1d425f1904570c1cfb
parent: c2f6cf0a21b1da8f0cb74cd7dfc5e41cf88e133f [diff] [blame]
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 14e4d57..5a91f9c 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c

@@ -3275,24 +3275,122 @@
 
 static void mmc_blk_cmdq_reset(struct mmc_host *host, bool clear_all)
 {
-	if (!host->cmdq_ops->reset)
-		return;
+	int err = 0;
 
-	if (!test_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state)) {
-		if (mmc_cmdq_halt(host, true)) {
-			pr_err("%s: halt failed\n", mmc_hostname(host));
-			goto reset;
-		}
+	if (mmc_cmdq_halt(host, true)) {
+		pr_err("%s: halt failed\n", mmc_hostname(host));
+		goto reset;
 	}
 
 	if (clear_all)
 		mmc_cmdq_discard_queue(host, 0);
 reset:
-	mmc_hw_reset(host);
 	mmc_host_clk_hold(host);
-	host->cmdq_ops->reset(host, true);
+	host->cmdq_ops->disable(host, true);
 	mmc_host_clk_release(host);
-	clear_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
+	err = mmc_cmdq_hw_reset(host);
+	if (err && err != -EOPNOTSUPP) {
+		pr_err("%s: failed to cmdq_hw_reset err = %d\n",
+				mmc_hostname(host), err);
+		mmc_host_clk_hold(host);
+		host->cmdq_ops->enable(host);
+		mmc_host_clk_release(host);
+		mmc_cmdq_halt(host, false);
+		goto out;
+	}
+	/*
+	 * CMDQ HW reset would have already made CQE
+	 * in unhalted state, but reflect the same
+	 * in software state of cmdq_ctx.
+	 */
+	mmc_host_clr_halt(host);
+out:
+	return;
+}
+
+/**
+ * is_cmdq_dcmd_req - Checks if tag belongs to DCMD request.
+ * @q:		request_queue pointer.
+ * @tag:	tag number of request to check.
+ *
+ * This function checks if the request with tag number "tag"
+ * is a DCMD request or not based on cmdq_req_flags set.
+ *
+ * returns true if DCMD req, otherwise false.
+ */
+static bool is_cmdq_dcmd_req(struct request_queue *q, int tag)
+{
+	struct request *req;
+	struct mmc_queue_req *mq_rq;
+	struct mmc_cmdq_req *cmdq_req;
+
+	req = blk_queue_find_tag(q, tag);
+	if (WARN_ON(!req))
+		goto out;
+	mq_rq = req->special;
+	if (WARN_ON(!mq_rq))
+		goto out;
+	cmdq_req = &(mq_rq->cmdq_req);
+	return (cmdq_req->cmdq_req_flags & DCMD);
+out:
+	return -ENOENT;
+}
+
+/**
+ * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
+ * @host:	mmc_host pointer.
+ * @err:	error for which reset is performed.
+ *
+ * This function implements reset_all functionality for
+ * cmdq. It resets the controller, power cycle the card,
+ * and invalidate all busy tags(requeue all request back to
+ * elevator).
+ */
+static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
+{
+	struct mmc_request *mrq = host->err_mrq;
+	struct mmc_card *card = host->card;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct request_queue *q;
+	int itag = 0;
+	int ret = 0;
+
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
+	WARN_ON(!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
+
+	#ifdef CONFIG_MMC_CLKGATE
+	pr_debug("%s: %s: active_reqs = %lu, clk_requests = %d\n",
+			mmc_hostname(host), __func__,
+			ctx_info->active_reqs, host->clk_requests);
+	#endif
+
+	mmc_blk_cmdq_reset(host, false);
+
+	for_each_set_bit(itag, &ctx_info->active_reqs,
+			host->num_cq_slots) {
+		ret = is_cmdq_dcmd_req(q, itag);
+		if (WARN_ON(ret == -ENOENT))
+			continue;
+		if (!ret) {
+			WARN_ON(!test_and_clear_bit(itag,
+				 &ctx_info->data_active_reqs));
+		} else {
+			clear_bit(CMDQ_STATE_DCMD_ACTIVE,
+					&ctx_info->curr_state);
+		}
+		mmc_cmdq_post_req(host, itag, err);
+		WARN_ON(!test_and_clear_bit(itag,
+					&ctx_info->active_reqs));
+		mmc_host_clk_release(host);
+		mmc_put_card(card);
+	}
+
+	spin_lock_irq(q->queue_lock);
+	blk_queue_invalidate_tags(q);
+	spin_unlock_irq(q->queue_lock);
 }
 
 static void mmc_blk_cmdq_shutdown(struct mmc_queue *mq)
@@ -3335,6 +3433,7 @@
 	struct mmc_queue_req *mq_rq = req->special;
 	struct mmc_request *mrq;
 	struct mmc_cmdq_req *cmdq_req;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
 
 	BUG_ON(!host);
 
@@ -3360,32 +3459,40 @@
 	else
 		mrq->data->error = -ETIMEDOUT;
 
-	BUG_ON(host->err_mrq != NULL);
-	host->err_mrq = mrq;
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
+		test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
+		return BLK_EH_NOT_HANDLED;
 
-	mmc_host_clk_release(mrq->host);
+	set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
 	return BLK_EH_HANDLED;
 }
 
+/*
+ * mmc_blk_cmdq_err: error handling of cmdq error requests.
+ * Function should be called in context of error out request
+ * which has claim_host and rpm acquired.
+ * This may be called with CQ engine halted. Make sure to
+ * unhalt it after error recovery.
+ *
+ * TODO: Currently cmdq error handler does reset_all in case
+ * of any erorr. Need to optimize error handling.
+ */
 static void mmc_blk_cmdq_err(struct mmc_queue *mq)
 {
-	int err;
-	int retry = 0;
-	int gen_err;
-	u32 status;
-
 	struct mmc_host *host = mq->card->host;
 	struct mmc_request *mrq = host->err_mrq;
-	struct mmc_card *card = mq->card;
 	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
-	struct request_queue *q = mrq->req->q;
-	int tag = mrq->req->tag;
+	struct request_queue *q;
+	int err;
 
-	pm_runtime_get_sync(&card->dev);
 	mmc_host_clk_hold(host);
 	host->cmdq_ops->dumpstate(host);
 	mmc_host_clk_release(host);
 
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
 	err = mmc_cmdq_halt(host, true);
 	if (err) {
 		pr_err("halt: failed: %d\n", err);
@@ -3394,74 +3501,44 @@
 
 	/* RED error - Fatal: requires reset */
 	if (mrq->cmdq_req->resp_err) {
+		err = mrq->cmdq_req->resp_err;
 		pr_crit("%s: Response error detected: Device in bad state\n",
 			mmc_hostname(host));
-		blk_end_request_all(mrq->req, -EIO);
 		goto reset;
 	}
 
-	if (mrq->data && mrq->data->error) {
-		blk_end_request_all(mrq->req, mrq->data->error);
-		for (; retry < MAX_RETRIES; retry++) {
-			err = get_card_status(card, &status, 0);
-			if (!err)
-				break;
-		}
+	/*
+	 * In case of software request time-out, we schedule err work only for
+	 * the first error out request and handles all other request in flight
+	 * here.
+	 */
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
+		err = -ETIMEDOUT;
+	} else if (mrq->data && mrq->data->error) {
+		err = mrq->data->error;
+	} else if (mrq->cmd && mrq->cmd->error) {
+		/* DCMD commands */
+		err = mrq->cmd->error;
 
-		if (err) {
-			pr_err("%s: No response from card !!!\n",
-			       mmc_hostname(host));
-			goto reset;
-		}
-
-		if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
-		    R1_CURRENT_STATE(status) == R1_STATE_RCV) {
-			err =  send_stop(card, MMC_CMDQ_STOP_TIMEOUT_MS,
-					 mrq->req, &gen_err, &status);
-			if (err) {
-				pr_err("%s: error %d sending stop (%d) command\n",
-					mmc_hostname(host),
-					err, status);
-				goto reset;
-			}
-		}
-
-		if (mmc_cmdq_discard_queue(host, tag))
-			goto reset;
-		else
-			goto unhalt;
-	}
-
-	/* DCMD commands */
-	if (mrq->cmd && mrq->cmd->error) {
 		/*
 		 * Notify completion for non flush commands like discard
 		 * that wait for DCMD finish.
 		 */
 		if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
 			complete(&mrq->completion);
-			goto reset;
 		}
-		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
-		blk_end_request_all(mrq->req, mrq->cmd->error);
 	}
 
 reset:
-	spin_lock_irq(mq->queue->queue_lock);
-	blk_queue_invalidate_tags(q);
-	spin_unlock_irq(mq->queue->queue_lock);
-	mmc_blk_cmdq_reset(host, true);
-	goto out;
-
-unhalt:
+	mmc_blk_cmdq_reset_all(host, err);
+	if (mrq->cmdq_req->resp_err)
+		mrq->cmdq_req->resp_err = false;
 	mmc_cmdq_halt(host, false);
 
-out:
 	host->err_mrq = NULL;
-	pm_runtime_mark_last_busy(&card->dev);
-	clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
+	clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
+	WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
 	wake_up(&ctx_info->wait);
-	__mmc_put_card(card);
 }
 
 /* invoked by block layer in softirq context */
@@ -3481,31 +3558,38 @@
 	else if (mrq->data && mrq->data->error)
 		err = mrq->data->error;
 
-	/* clear pending request */
-	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
-				   &ctx_info->active_reqs));
-
-	if (cmdq_req->cmdq_req_flags & DCMD)
-		is_dcmd = true;
-	else
-		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
-			 &ctx_info->data_active_reqs));
-
-	mmc_cmdq_post_req(host, cmdq_req->tag, err);
-	if (err) {
-		pr_err("%s: %s: txfr error: %d\n", mmc_hostname(mrq->host),
-		       __func__, err);
+	if (err || cmdq_req->resp_err) {
+		pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
+				mmc_hostname(mrq->host), __func__, err,
+				cmdq_req->resp_err);
 		if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
 			pr_err("%s: CQ in error state, ending current req: %d\n",
 				__func__, err);
-			blk_end_request_all(rq, err);
 		} else {
 			set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
+			BUG_ON(host->err_mrq != NULL);
+			host->err_mrq = mrq;
 			schedule_work(&mq->cmdq_err_work);
 		}
 		goto out;
 	}
+	/*
+	 * In case of error CMDQ is expected to be either in halted
+	 * or disable state so cannot receive any completion of
+	 * other requests.
+	 */
+	BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
 
+	/* clear pending request */
+	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+				   &ctx_info->active_reqs));
+	if (cmdq_req->cmdq_req_flags & DCMD)
+		is_dcmd = true;
+	else
+		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+					 &ctx_info->data_active_reqs));
+
+	mmc_cmdq_post_req(host, cmdq_req->tag, err);
 	if (cmdq_req->cmdq_req_flags & DCMD) {
 		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
 		blk_end_request_all(rq, err);
@@ -3517,10 +3601,11 @@
 out:
 
 	mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd);
-	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
+	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
 		wake_up(&ctx_info->wait);
+		mmc_put_card(host->card);
+	}
 
-	mmc_put_card(host->card);
 	if (!ctx_info->active_reqs)
 		wake_up_interruptible(&host->cmdq_ctx.queue_empty_wq);
 
@@ -3535,7 +3620,6 @@
 {
 	struct request *req = mrq->req;
 
-	mmc_host_clk_release(mrq->host);
 	blk_complete_request(req);
 }
 EXPORT_SYMBOL(mmc_blk_cmdq_req_done);
commit	d7e5be8b22317af74bd53faa4698a718121c748e	[log] [tgz]
author	Ritesh Harjani <riteshh@codeaurora.org>	Wed Oct 07 16:42:47 2015 +0530
committer	Xiaonian Wang <xiaonian@codeaurora.org>	Fri Apr 07 17:31:48 2017 -0700
tree	a067013141ed66138e58bb1d425f1904570c1cfb
parent	c2f6cf0a21b1da8f0cb74cd7dfc5e41cf88e133f [diff] [blame]