block: row: Re-design urgent request notification mechanism

When ROW scheduler reports to the block layer that there is an urgent
request pending, the device driver may decide to stop the transmission
of the current request in order to handle the urgent one. This is done
in order to reduce the latency of an urgent request. For example:
long WRITE may be stopped to handle an urgent READ.

This patch updates the ROW URGENT notification policy to apply with the
below:

- Don't notify URGENT if there is an un-completed URGENT request in driver
- After notifying that URGENT request is present, the next request
  dispatched is the URGENT one.
- At every given moment only 1 request can be marked as URGENT.
  Independent of it's location (driver or scheduler)

Other changes to URGENT policy:
- Only READ queues are allowed to notify of an URGENT request pending.

CR fix:
If a pending urgent request (A) gets merged with another request (B)
A is removed from scheduler queue but is not removed from
rd->pending_urgent_rq.

CRs-Fixed: 453712
Change-Id: I321e8cf58e12a05b82edd2a03f52fcce7bc9a900
Signed-off-by: Tatyana Brokhman <tlinder@codeaurora.org>
diff --git a/block/row-iosched.c b/block/row-iosched.c
index bdb6abd..3baec8c 100644
--- a/block/row-iosched.c
+++ b/block/row-iosched.c
@@ -87,7 +87,7 @@
 static const struct row_queue_params row_queues_def[] = {
 /* idling_enabled, quantum, is_urgent */
 	{true, 10, true},	/* ROWQ_PRIO_HIGH_READ */
-	{false, 1, true},	/* ROWQ_PRIO_HIGH_SWRITE */
+	{false, 1, false},	/* ROWQ_PRIO_HIGH_SWRITE */
 	{true, 100, true},	/* ROWQ_PRIO_REG_READ */
 	{false, 1, false},	/* ROWQ_PRIO_REG_SWRITE */
 	{false, 1, false},	/* ROWQ_PRIO_REG_WRITE */
@@ -165,8 +165,11 @@
  * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
  *			scheduler, nr_reqs[1] holds the number of all WRITE
  *			requests in scheduler
- * @nr_urgent_in_flight: number of uncompleted urgent requests
- *			(both reads and writes)
+ * @urgent_in_flight: flag indicating that there is an urgent
+ *			request that was dispatched to driver and is yet to
+ *			complete.
+ * @pending_urgent_rq:	pointer to the pending urgent request
+ * @last_served_ioprio_class: I/O priority class that was last dispatched from
  * @cycle_flags:	used for marking unserved queueus
  *
  */
@@ -177,8 +180,9 @@
 
 	struct idling_data		rd_idle_data;
 	unsigned int			nr_reqs[2];
-	unsigned int			nr_urgent_in_flight;
-
+	bool				urgent_in_flight;
+	struct request			*pending_urgent_rq;
+	int				last_served_ioprio_class;
 	unsigned int			cycle_flags;
 };
 
@@ -274,6 +278,12 @@
 	rqueue->nr_req++;
 	rq_set_fifo_time(rq, jiffies); /* for statistics*/
 
+	if (rq->cmd_flags & REQ_URGENT) {
+		WARN_ON(1);
+		blk_dump_rq_flags(rq, "");
+		rq->cmd_flags &= ~REQ_URGENT;
+	}
+
 	if (row_queues_def[rqueue->prio].idling_enabled) {
 		if (rd->rd_idle_data.idling_queue_idx == rqueue->prio &&
 		    hrtimer_active(&rd->rd_idle_data.hr_timer)) {
@@ -286,7 +296,8 @@
 		diff_ms = ktime_to_ms(ktime_sub(ktime_get(),
 				rqueue->idle_data.last_insert_time));
 		if (unlikely(diff_ms < 0)) {
-			pr_err("ROW BUG: %s diff_ms < 0", __func__);
+			pr_err("%s(): time delta error: diff_ms < 0",
+				__func__);
 			rqueue->idle_data.begin_idling = false;
 			return;
 		}
@@ -302,11 +313,23 @@
 		rqueue->idle_data.last_insert_time = ktime_get();
 	}
 	if (row_queues_def[rqueue->prio].is_urgent &&
-	    row_rowq_unserved(rd, rqueue->prio)) {
-		row_log_rowq(rd, rqueue->prio,
-			"added urgent request (total on queue=%d)",
-			rqueue->nr_req);
-		rq->cmd_flags |= REQ_URGENT;
+	    !rd->pending_urgent_rq && !rd->urgent_in_flight) {
+		/* Handle High Priority queues */
+		if (rqueue->prio < ROWQ_REG_PRIO_IDX &&
+		    rd->last_served_ioprio_class != IOPRIO_CLASS_RT) {
+			row_log_rowq(rd, rqueue->prio,
+				"added (high prio) urgent request");
+			rq->cmd_flags |= REQ_URGENT;
+			rd->pending_urgent_rq = rq;
+		} else  if (row_rowq_unserved(rd, rqueue->prio)) {
+			/* Handle Regular priotity queues */
+			row_log_rowq(rd, rqueue->prio,
+				"added urgent request (total on queue=%d)",
+				rqueue->nr_req);
+			rq->cmd_flags |= REQ_URGENT;
+			WARN_ON(rqueue->nr_req > 1);
+			rd->pending_urgent_rq = rq;
+		}
 	} else
 		row_log_rowq(rd, rqueue->prio,
 			"added request (total on queue=%d)", rqueue->nr_req);
@@ -328,20 +351,39 @@
 	struct row_data    *rd = q->elevator->elevator_data;
 	struct row_queue   *rqueue = RQ_ROWQ(rq);
 
-	if (rqueue->prio >= ROWQ_MAX_PRIO) {
-		pr_err("\n\n%s:ROW BUG: row_reinsert_req() rqueue->prio = %d\n",
-			   rq->rq_disk->disk_name, rqueue->prio);
-		blk_dump_rq_flags(rq, "");
+	if (!rqueue || rqueue->prio >= ROWQ_MAX_PRIO)
 		return -EIO;
-	}
 
 	list_add(&rq->queuelist, &rqueue->fifo);
 	rd->nr_reqs[rq_data_dir(rq)]++;
 	rqueue->nr_req++;
 
 	row_log_rowq(rd, rqueue->prio,
-		"request reinserted (total on queue=%d)", rqueue->nr_req);
+		"%s request reinserted (total on queue=%d)",
+		(rq_data_dir(rq) == READ ? "READ" : "write"), rqueue->nr_req);
 
+	if (rq->cmd_flags & REQ_URGENT) {
+		/*
+		 * It's not compliant with the design to re-insert
+		 * urgent requests. We want to be able to track this
+		 * down.
+		 */
+		WARN_ON(1);
+		if (!rd->urgent_in_flight) {
+			pr_err("%s(): no urgent in flight", __func__);
+		} else {
+			rd->urgent_in_flight = false;
+			pr_err("%s(): reinserting URGENT %s req",
+				__func__,
+				(rq_data_dir(rq) == READ ? "READ" : "WRITE"));
+			if (rd->pending_urgent_rq) {
+				pr_err("%s(): urgent rq is pending",
+					__func__);
+				rd->pending_urgent_rq->cmd_flags &= ~REQ_URGENT;
+			}
+			rd->pending_urgent_rq = rq;
+		}
+	}
 	return 0;
 }
 
@@ -350,13 +392,17 @@
 	struct row_data *rd = q->elevator->elevator_data;
 
 	 if (rq->cmd_flags & REQ_URGENT) {
-		if (!rd->nr_urgent_in_flight) {
-			pr_err("ROW BUG: %s() nr_urgent_in_flight = 0",
+		if (!rd->urgent_in_flight) {
+			WARN_ON(1);
+			pr_err("%s(): URGENT req but urgent_in_flight = F",
 				__func__);
-			return;
 		}
-		rd->nr_urgent_in_flight--;
+		rd->urgent_in_flight = false;
+		rq->cmd_flags &= ~REQ_URGENT;
 	}
+	row_log(q, "completed %s %s req.",
+		(rq->cmd_flags & REQ_URGENT ? "URGENT" : "regular"),
+		(rq_data_dir(rq) == READ ? "READ" : "WRITE"));
 }
 
 /**
@@ -367,28 +413,19 @@
 static bool row_urgent_pending(struct request_queue *q)
 {
 	struct row_data *rd = q->elevator->elevator_data;
-	int i;
 
-	if (rd->nr_urgent_in_flight) {
+	if (rd->urgent_in_flight) {
 		row_log(rd->dispatch_queue, "%d urgent requests in flight",
-			rd->nr_urgent_in_flight);
+			rd->urgent_in_flight);
 		return false;
 	}
 
-	for (i = ROWQ_HIGH_PRIO_IDX; i < ROWQ_REG_PRIO_IDX; i++)
-		if (!list_empty(&rd->row_queues[i].fifo)) {
-			row_log_rowq(rd, i,
-				"Urgent (high prio) request pending");
-			return true;
-		}
+	if (rd->pending_urgent_rq) {
+		row_log(rd->dispatch_queue, "Urgent request pending");
+		return true;
+	}
 
-	for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_MAX_PRIO; i++)
-		if (row_queues_def[i].is_urgent && row_rowq_unserved(rd, i) &&
-		    !list_empty(&rd->row_queues[i].fifo)) {
-			row_log_rowq(rd, i, "Urgent request pending");
-			return true;
-		}
-
+	row_log(rd->dispatch_queue, "no urgent request pending/in flight");
 	return false;
 }
 
@@ -398,13 +435,16 @@
  * @rq:	request to remove
  *
  */
-static void row_remove_request(struct request_queue *q,
+static void row_remove_request(struct row_data *rd,
 			       struct request *rq)
 {
-	struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
 	struct row_queue *rqueue = RQ_ROWQ(rq);
 
-	rq_fifo_clear(rq);
+	list_del_init(&(rq)->queuelist);
+	if (rd->pending_urgent_rq == rq)
+		rd->pending_urgent_rq = NULL;
+	else
+		BUG_ON(rq->cmd_flags & REQ_URGENT);
 	rqueue->nr_req--;
 	rd->nr_reqs[rq_data_dir(rq)]--;
 }
@@ -412,25 +452,32 @@
 /*
  * row_dispatch_insert() - move request to dispatch queue
  * @rd:		pointer to struct row_data
- * @queue_idx:	index of the row_queue to dispatch from
+ * @rq:		the request to dispatch
  *
- * This function moves the next request to dispatch from
- * the given queue (row_queues[queue_idx]) to the dispatch queue
+ * This function moves the given request to the dispatch queue
  *
  */
-static void row_dispatch_insert(struct row_data *rd, int queue_idx)
+static void row_dispatch_insert(struct row_data *rd, struct request *rq)
 {
-	struct request *rq;
+	struct row_queue *rqueue = RQ_ROWQ(rq);
 
-	rq = rq_entry_fifo(rd->row_queues[queue_idx].fifo.next);
-	row_remove_request(rd->dispatch_queue, rq);
-	elv_dispatch_add_tail(rd->dispatch_queue, rq);
-	rd->row_queues[queue_idx].nr_dispatched++;
-	row_clear_rowq_unserved(rd, queue_idx);
-	row_log_rowq(rd, queue_idx, " Dispatched request nr_disp = %d",
-		     rd->row_queues[queue_idx].nr_dispatched);
-	if (rq->cmd_flags & REQ_URGENT)
-		rd->nr_urgent_in_flight++;
+	row_remove_request(rd, rq);
+	elv_dispatch_sort(rd->dispatch_queue, rq);
+	if (rq->cmd_flags & REQ_URGENT) {
+		WARN_ON(rd->urgent_in_flight);
+		rd->urgent_in_flight = true;
+	}
+	rqueue->nr_dispatched++;
+	row_clear_rowq_unserved(rd, rqueue->prio);
+	row_log_rowq(rd, rqueue->prio,
+		" Dispatched request %p nr_disp = %d", rq,
+		rqueue->nr_dispatched);
+	if (rqueue->prio < ROWQ_REG_PRIO_IDX)
+		rd->last_served_ioprio_class = IOPRIO_CLASS_RT;
+	else if (rqueue->prio < ROWQ_LOW_PRIO_IDX)
+		rd->last_served_ioprio_class = IOPRIO_CLASS_BE;
+	else
+		rd->last_served_ioprio_class = IOPRIO_CLASS_IDLE;
 }
 
 /*
@@ -595,12 +642,20 @@
 		rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
 	}
 
+	if (rd->pending_urgent_rq) {
+		row_log(rd->dispatch_queue, "dispatching urgent request");
+		row_dispatch_insert(rd, rd->pending_urgent_rq);
+		ret = 1;
+		goto done;
+	}
+
 	ioprio_class_to_serve = row_get_ioprio_class_to_serve(rd, force);
 	row_log(rd->dispatch_queue, "Dispatching from %d priority class",
 		ioprio_class_to_serve);
 
 	switch (ioprio_class_to_serve) {
 	case IOPRIO_CLASS_NONE:
+		rd->last_served_ioprio_class = IOPRIO_CLASS_NONE;
 		goto done;
 	case IOPRIO_CLASS_RT:
 		start_idx = ROWQ_HIGH_PRIO_IDX;
@@ -623,7 +678,8 @@
 
 	/* Dispatch */
 	if (currq >= 0) {
-		row_dispatch_insert(rd, currq);
+		row_dispatch_insert(rd,
+			rq_entry_fifo(rd->row_queues[currq].fifo.next));
 		ret = 1;
 	}
 done:
@@ -672,7 +728,7 @@
 	rdata->rd_idle_data.hr_timer.function = &row_idle_hrtimer_fn;
 
 	INIT_WORK(&rdata->rd_idle_data.idle_work, kick_queue);
-
+	rdata->last_served_ioprio_class = IOPRIO_CLASS_NONE;
 	rdata->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
 	rdata->dispatch_queue = q;
 
@@ -692,7 +748,7 @@
 	for (i = 0; i < ROWQ_MAX_PRIO; i++)
 		BUG_ON(!list_empty(&rd->row_queues[i].fifo));
 	if (hrtimer_cancel(&rd->rd_idle_data.hr_timer))
-		pr_err("ROW BUG: idle timer was active!");
+		pr_err("%s(): idle timer was active!", __func__);
 	rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
 	kfree(rd);
 }
@@ -710,7 +766,13 @@
 
 	list_del_init(&next->queuelist);
 	rqueue->nr_req--;
-
+	if (rqueue->rdata->pending_urgent_rq == next) {
+		pr_err("\n\nROW_WARNING: merging pending urgent!");
+		rqueue->rdata->pending_urgent_rq = rq;
+		rq->cmd_flags |= REQ_URGENT;
+		WARN_ON(!(next->cmd_flags & REQ_URGENT));
+		next->cmd_flags &= ~REQ_URGENT;
+	}
 	rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
 }
 
@@ -722,7 +784,8 @@
  * dispatched from later on)
  *
  */
-static enum row_queue_prio row_get_queue_prio(struct request *rq)
+static enum row_queue_prio row_get_queue_prio(struct request *rq,
+				struct row_data *rd)
 {
 	const int data_dir = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
@@ -740,7 +803,6 @@
 				rq->rq_disk->disk_name, __func__);
 			q_type = ROWQ_PRIO_REG_WRITE;
 		}
-		rq->cmd_flags |= REQ_URGENT;
 		break;
 	case IOPRIO_CLASS_IDLE:
 		if (data_dir == READ)
@@ -783,7 +845,7 @@
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	rq->elv.priv[0] =
-		(void *)(&rd->row_queues[row_get_queue_prio(rq)]);
+		(void *)(&rd->row_queues[row_get_queue_prio(rq, rd)]);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	return 0;