| /* |
| * ROW (Read Over Write) I/O scheduler. |
| * |
| * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 and |
| * only version 2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| /* See Documentation/block/row-iosched.txt */ |
| |
| #include <linux/kernel.h> |
| #include <linux/fs.h> |
| #include <linux/blkdev.h> |
| #include <linux/elevator.h> |
| #include <linux/bio.h> |
| #include <linux/module.h> |
| #include <linux/slab.h> |
| #include <linux/init.h> |
| #include <linux/compiler.h> |
| #include <linux/blktrace_api.h> |
| #include <linux/hrtimer.h> |
| |
| /* |
| * enum row_queue_prio - Priorities of the ROW queues |
| * |
| * This enum defines the priorities (and the number of queues) |
| * the requests will be distributed to. The higher priority - |
| * the bigger is the "bus time" (or the dispatch quantum) given |
| * to that queue. |
| * ROWQ_PRIO_HIGH_READ - is the higher priority queue. |
| * |
| */ |
| enum row_queue_prio { |
| ROWQ_PRIO_HIGH_READ = 0, |
| ROWQ_PRIO_HIGH_SWRITE, |
| ROWQ_PRIO_REG_READ, |
| ROWQ_PRIO_REG_SWRITE, |
| ROWQ_PRIO_REG_WRITE, |
| ROWQ_PRIO_LOW_READ, |
| ROWQ_PRIO_LOW_SWRITE, |
| ROWQ_MAX_PRIO, |
| }; |
| |
| /* |
| * The following indexes define the distribution of ROW queues according to |
| * priorities. Each index defines the first queue in that priority group. |
| */ |
| #define ROWQ_HIGH_PRIO_IDX ROWQ_PRIO_HIGH_READ |
| #define ROWQ_REG_PRIO_IDX ROWQ_PRIO_REG_READ |
| #define ROWQ_LOW_PRIO_IDX ROWQ_PRIO_LOW_READ |
| |
| /** |
| * struct row_queue_params - ROW queue parameters |
| * @idling_enabled: Flag indicating whether idling is enable on |
| * the queue |
| * @quantum: Number of requests to be dispatched from this queue |
| * in a dispatch cycle |
| * @is_urgent: Flags indicating whether the queue can notify on |
| * urgent requests |
| * |
| */ |
| struct row_queue_params { |
| bool idling_enabled; |
| int quantum; |
| bool is_urgent; |
| }; |
| |
| /* |
| * This array holds the default values of the different configurables |
| * for each ROW queue. Each row of the array holds the following values: |
| * {idling_enabled, quantum, is_urgent} |
| * Each row corresponds to a queue with the same index (according to |
| * enum row_queue_prio) |
| * Note: The quantums are valid inside their priority type. For example: |
| * For every 10 high priority read requests, 1 high priority sync |
| * write will be dispatched. |
| * For every 100 regular read requests 1 regular write request will |
| * be dispatched. |
| */ |
| static const struct row_queue_params row_queues_def[] = { |
| /* idling_enabled, quantum, is_urgent */ |
| {true, 10, true}, /* ROWQ_PRIO_HIGH_READ */ |
| {false, 1, false}, /* ROWQ_PRIO_HIGH_SWRITE */ |
| {true, 100, true}, /* ROWQ_PRIO_REG_READ */ |
| {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */ |
| {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */ |
| {false, 1, false}, /* ROWQ_PRIO_LOW_READ */ |
| {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */ |
| }; |
| |
| /* Default values for idling on read queues (in msec) */ |
| #define ROW_IDLE_TIME_MSEC 5 |
| #define ROW_READ_FREQ_MSEC 5 |
| |
| /** |
| * struct rowq_idling_data - parameters for idling on the queue |
| * @last_insert_time: time the last request was inserted |
| * to the queue |
| * @begin_idling: flag indicating wether we should idle |
| * |
| */ |
| struct rowq_idling_data { |
| ktime_t last_insert_time; |
| bool begin_idling; |
| }; |
| |
| /** |
| * struct row_queue - requests grouping structure |
| * @rdata: parent row_data structure |
| * @fifo: fifo of requests |
| * @prio: queue priority (enum row_queue_prio) |
| * @nr_dispatched: number of requests already dispatched in |
| * the current dispatch cycle |
| * @nr_req: number of requests in queue |
| * @dispatch quantum: number of requests this queue may |
| * dispatch in a dispatch cycle |
| * @idle_data: data for idling on queues |
| * |
| */ |
| struct row_queue { |
| struct row_data *rdata; |
| struct list_head fifo; |
| enum row_queue_prio prio; |
| |
| unsigned int nr_dispatched; |
| |
| unsigned int nr_req; |
| int disp_quantum; |
| |
| /* used only for READ queues */ |
| struct rowq_idling_data idle_data; |
| }; |
| |
| /** |
| * struct idling_data - data for idling on empty rqueue |
| * @idle_time_ms: idling duration (msec) |
| * @freq_ms: min time between two requests that |
| * triger idling (msec) |
| * @hr_timer: idling timer |
| * @idle_work: the work to be scheduled when idling timer expires |
| * @idling_queue_idx: index of the queues we're idling on |
| * |
| */ |
| struct idling_data { |
| s64 idle_time_ms; |
| s64 freq_ms; |
| |
| struct hrtimer hr_timer; |
| struct work_struct idle_work; |
| enum row_queue_prio idling_queue_idx; |
| }; |
| |
| /** |
| * struct starvation_data - data for starvation management |
| * @starvation_limit: number of times this priority class |
| * can tolerate being starved |
| * @starvation_counter: number of requests from higher |
| * priority classes that were dispatched while this |
| * priority request were pending |
| * |
| */ |
| struct starvation_data { |
| int starvation_limit; |
| int starvation_counter; |
| }; |
| |
| /** |
| * struct row_queue - Per block device rqueue structure |
| * @dispatch_queue: dispatch rqueue |
| * @row_queues: array of priority request queues |
| * @rd_idle_data: data for idling after READ request |
| * @nr_reqs: nr_reqs[0] holds the number of all READ requests in |
| * scheduler, nr_reqs[1] holds the number of all WRITE |
| * requests in scheduler |
| * @urgent_in_flight: flag indicating that there is an urgent |
| * request that was dispatched to driver and is yet to |
| * complete. |
| * @pending_urgent_rq: pointer to the pending urgent request |
| * @last_served_ioprio_class: I/O priority class that was last dispatched from |
| * @reg_prio_starvation: starvation data for REGULAR priority queues |
| * @low_prio_starvation: starvation data for LOW priority queues |
| * @cycle_flags: used for marking unserved queueus |
| * |
| */ |
| struct row_data { |
| struct request_queue *dispatch_queue; |
| |
| struct row_queue row_queues[ROWQ_MAX_PRIO]; |
| |
| struct idling_data rd_idle_data; |
| unsigned int nr_reqs[2]; |
| bool urgent_in_flight; |
| struct request *pending_urgent_rq; |
| int last_served_ioprio_class; |
| |
| #define ROW_REG_STARVATION_TOLLERANCE 5000 |
| struct starvation_data reg_prio_starvation; |
| #define ROW_LOW_STARVATION_TOLLERANCE 10000 |
| struct starvation_data low_prio_starvation; |
| |
| unsigned int cycle_flags; |
| }; |
| |
| #define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0])) |
| |
| #define row_log(q, fmt, args...) \ |
| blk_add_trace_msg(q, "%s():" fmt , __func__, ##args) |
| #define row_log_rowq(rdata, rowq_id, fmt, args...) \ |
| blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \ |
| rowq_id, ##args) |
| |
| static inline void row_mark_rowq_unserved(struct row_data *rd, |
| enum row_queue_prio qnum) |
| { |
| rd->cycle_flags |= (1 << qnum); |
| } |
| |
| static inline void row_clear_rowq_unserved(struct row_data *rd, |
| enum row_queue_prio qnum) |
| { |
| rd->cycle_flags &= ~(1 << qnum); |
| } |
| |
| static inline int row_rowq_unserved(struct row_data *rd, |
| enum row_queue_prio qnum) |
| { |
| return rd->cycle_flags & (1 << qnum); |
| } |
| |
| static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd) |
| { |
| int i; |
| |
| row_log(rd->dispatch_queue, " Queues status:"); |
| for (i = 0; i < ROWQ_MAX_PRIO; i++) |
| row_log(rd->dispatch_queue, |
| "queue%d: dispatched= %d, nr_req=%d", i, |
| rd->row_queues[i].nr_dispatched, |
| rd->row_queues[i].nr_req); |
| } |
| |
| /******************** Static helper functions ***********************/ |
| static void kick_queue(struct work_struct *work) |
| { |
| struct idling_data *read_data = |
| container_of(work, struct idling_data, idle_work); |
| struct row_data *rd = |
| container_of(read_data, struct row_data, rd_idle_data); |
| |
| blk_run_queue(rd->dispatch_queue); |
| } |
| |
| |
| static enum hrtimer_restart row_idle_hrtimer_fn(struct hrtimer *hr_timer) |
| { |
| struct idling_data *read_data = |
| container_of(hr_timer, struct idling_data, hr_timer); |
| struct row_data *rd = |
| container_of(read_data, struct row_data, rd_idle_data); |
| |
| row_log_rowq(rd, rd->rd_idle_data.idling_queue_idx, |
| "Performing delayed work"); |
| /* Mark idling process as done */ |
| rd->row_queues[rd->rd_idle_data.idling_queue_idx]. |
| idle_data.begin_idling = false; |
| rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO; |
| |
| if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE]) |
| row_log(rd->dispatch_queue, "No requests in scheduler"); |
| else |
| kblockd_schedule_work(rd->dispatch_queue, |
| &read_data->idle_work); |
| return HRTIMER_NORESTART; |
| } |
| |
| /* |
| * row_regular_req_pending() - Check if there are REGULAR priority requests |
| * Pending in scheduler |
| * @rd: pointer to struct row_data |
| * |
| * Returns True if there are REGULAR priority requests in scheduler queues. |
| * False, otherwise. |
| */ |
| static inline bool row_regular_req_pending(struct row_data *rd) |
| { |
| int i; |
| |
| for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++) |
| if (!list_empty(&rd->row_queues[i].fifo)) |
| return true; |
| return false; |
| } |
| |
| /* |
| * row_low_req_pending() - Check if there are LOW priority requests |
| * Pending in scheduler |
| * @rd: pointer to struct row_data |
| * |
| * Returns True if there are LOW priority requests in scheduler queues. |
| * False, otherwise. |
| */ |
| static inline bool row_low_req_pending(struct row_data *rd) |
| { |
| int i; |
| |
| for (i = ROWQ_LOW_PRIO_IDX; i < ROWQ_MAX_PRIO; i++) |
| if (!list_empty(&rd->row_queues[i].fifo)) |
| return true; |
| return false; |
| } |
| |
| /******************* Elevator callback functions *********************/ |
| |
| /* |
| * row_add_request() - Add request to the scheduler |
| * @q: requests queue |
| * @rq: request to add |
| * |
| */ |
| static void row_add_request(struct request_queue *q, |
| struct request *rq) |
| { |
| struct row_data *rd = (struct row_data *)q->elevator->elevator_data; |
| struct row_queue *rqueue = RQ_ROWQ(rq); |
| s64 diff_ms; |
| bool queue_was_empty = list_empty(&rqueue->fifo); |
| unsigned long bv_page_flags = 0; |
| |
| if (rq->bio && rq->bio->bi_io_vec && rq->bio->bi_io_vec->bv_page) |
| bv_page_flags = rq->bio->bi_io_vec->bv_page->flags; |
| |
| list_add_tail(&rq->queuelist, &rqueue->fifo); |
| rd->nr_reqs[rq_data_dir(rq)]++; |
| rqueue->nr_req++; |
| rq_set_fifo_time(rq, jiffies); /* for statistics*/ |
| |
| if (rq->cmd_flags & REQ_URGENT) { |
| WARN_ON(1); |
| blk_dump_rq_flags(rq, ""); |
| rq->cmd_flags &= ~REQ_URGENT; |
| } |
| |
| if (row_queues_def[rqueue->prio].idling_enabled) { |
| if (rd->rd_idle_data.idling_queue_idx == rqueue->prio && |
| hrtimer_active(&rd->rd_idle_data.hr_timer)) { |
| if (hrtimer_try_to_cancel( |
| &rd->rd_idle_data.hr_timer) >= 0) { |
| row_log_rowq(rd, rqueue->prio, |
| "Canceled delayed work on %d", |
| rd->rd_idle_data.idling_queue_idx); |
| rd->rd_idle_data.idling_queue_idx = |
| ROWQ_MAX_PRIO; |
| } |
| } |
| diff_ms = ktime_to_ms(ktime_sub(ktime_get(), |
| rqueue->idle_data.last_insert_time)); |
| if (unlikely(diff_ms < 0)) { |
| pr_err("%s(): time delta error: diff_ms < 0", |
| __func__); |
| rqueue->idle_data.begin_idling = false; |
| return; |
| } |
| |
| if ((bv_page_flags & (1L << PG_readahead)) || |
| (diff_ms < rd->rd_idle_data.freq_ms)) { |
| rqueue->idle_data.begin_idling = true; |
| row_log_rowq(rd, rqueue->prio, "Enable idling"); |
| } else { |
| rqueue->idle_data.begin_idling = false; |
| row_log_rowq(rd, rqueue->prio, "Disable idling (%ldms)", |
| (long)diff_ms); |
| } |
| |
| rqueue->idle_data.last_insert_time = ktime_get(); |
| } |
| if (row_queues_def[rqueue->prio].is_urgent && |
| !rd->pending_urgent_rq && !rd->urgent_in_flight) { |
| /* Handle High Priority queues */ |
| if (rqueue->prio < ROWQ_REG_PRIO_IDX && |
| rd->last_served_ioprio_class != IOPRIO_CLASS_RT && |
| queue_was_empty) { |
| row_log_rowq(rd, rqueue->prio, |
| "added (high prio) urgent request"); |
| rq->cmd_flags |= REQ_URGENT; |
| rd->pending_urgent_rq = rq; |
| } else if (row_rowq_unserved(rd, rqueue->prio)) { |
| /* Handle Regular priotity queues */ |
| row_log_rowq(rd, rqueue->prio, |
| "added urgent request (total on queue=%d)", |
| rqueue->nr_req); |
| rq->cmd_flags |= REQ_URGENT; |
| WARN_ON(rqueue->nr_req > 1); |
| rd->pending_urgent_rq = rq; |
| } |
| } else |
| row_log_rowq(rd, rqueue->prio, |
| "added request (total on queue=%d)", rqueue->nr_req); |
| } |
| |
| /** |
| * row_reinsert_req() - Reinsert request back to the scheduler |
| * @q: requests queue |
| * @rq: request to add |
| * |
| * Reinsert the given request back to the queue it was |
| * dispatched from as if it was never dispatched. |
| * |
| * Returns 0 on success, error code otherwise |
| */ |
| static int row_reinsert_req(struct request_queue *q, |
| struct request *rq) |
| { |
| struct row_data *rd = q->elevator->elevator_data; |
| struct row_queue *rqueue = RQ_ROWQ(rq); |
| |
| if (!rqueue || rqueue->prio >= ROWQ_MAX_PRIO) |
| return -EIO; |
| |
| list_add(&rq->queuelist, &rqueue->fifo); |
| rd->nr_reqs[rq_data_dir(rq)]++; |
| rqueue->nr_req++; |
| |
| row_log_rowq(rd, rqueue->prio, |
| "%s request reinserted (total on queue=%d)", |
| (rq_data_dir(rq) == READ ? "READ" : "write"), rqueue->nr_req); |
| |
| if (rq->cmd_flags & REQ_URGENT) { |
| /* |
| * It's not compliant with the design to re-insert |
| * urgent requests. We want to be able to track this |
| * down. |
| */ |
| WARN_ON(1); |
| if (!rd->urgent_in_flight) { |
| pr_err("%s(): no urgent in flight", __func__); |
| } else { |
| rd->urgent_in_flight = false; |
| pr_err("%s(): reinserting URGENT %s req", |
| __func__, |
| (rq_data_dir(rq) == READ ? "READ" : "WRITE")); |
| if (rd->pending_urgent_rq) { |
| pr_err("%s(): urgent rq is pending", |
| __func__); |
| rd->pending_urgent_rq->cmd_flags &= ~REQ_URGENT; |
| } |
| rd->pending_urgent_rq = rq; |
| } |
| } |
| return 0; |
| } |
| |
| static void row_completed_req(struct request_queue *q, struct request *rq) |
| { |
| struct row_data *rd = q->elevator->elevator_data; |
| |
| if (rq->cmd_flags & REQ_URGENT) { |
| if (!rd->urgent_in_flight) { |
| WARN_ON(1); |
| pr_err("%s(): URGENT req but urgent_in_flight = F", |
| __func__); |
| } |
| rd->urgent_in_flight = false; |
| rq->cmd_flags &= ~REQ_URGENT; |
| } |
| row_log(q, "completed %s %s req.", |
| (rq->cmd_flags & REQ_URGENT ? "URGENT" : "regular"), |
| (rq_data_dir(rq) == READ ? "READ" : "WRITE")); |
| } |
| |
| /** |
| * row_urgent_pending() - Return TRUE if there is an urgent |
| * request on scheduler |
| * @q: requests queue |
| */ |
| static bool row_urgent_pending(struct request_queue *q) |
| { |
| struct row_data *rd = q->elevator->elevator_data; |
| |
| if (rd->urgent_in_flight) { |
| row_log(rd->dispatch_queue, "%d urgent requests in flight", |
| rd->urgent_in_flight); |
| return false; |
| } |
| |
| if (rd->pending_urgent_rq) { |
| row_log(rd->dispatch_queue, "Urgent request pending"); |
| return true; |
| } |
| |
| row_log(rd->dispatch_queue, "no urgent request pending/in flight"); |
| return false; |
| } |
| |
| /** |
| * row_remove_request() - Remove given request from scheduler |
| * @q: requests queue |
| * @rq: request to remove |
| * |
| */ |
| static void row_remove_request(struct row_data *rd, |
| struct request *rq) |
| { |
| struct row_queue *rqueue = RQ_ROWQ(rq); |
| |
| list_del_init(&(rq)->queuelist); |
| if (rd->pending_urgent_rq == rq) |
| rd->pending_urgent_rq = NULL; |
| else |
| BUG_ON(rq->cmd_flags & REQ_URGENT); |
| rqueue->nr_req--; |
| rd->nr_reqs[rq_data_dir(rq)]--; |
| } |
| |
| /* |
| * row_dispatch_insert() - move request to dispatch queue |
| * @rd: pointer to struct row_data |
| * @rq: the request to dispatch |
| * |
| * This function moves the given request to the dispatch queue |
| * |
| */ |
| static void row_dispatch_insert(struct row_data *rd, struct request *rq) |
| { |
| struct row_queue *rqueue = RQ_ROWQ(rq); |
| |
| row_remove_request(rd, rq); |
| elv_dispatch_sort(rd->dispatch_queue, rq); |
| if (rq->cmd_flags & REQ_URGENT) { |
| WARN_ON(rd->urgent_in_flight); |
| rd->urgent_in_flight = true; |
| } |
| rqueue->nr_dispatched++; |
| row_clear_rowq_unserved(rd, rqueue->prio); |
| row_log_rowq(rd, rqueue->prio, |
| " Dispatched request %p nr_disp = %d", rq, |
| rqueue->nr_dispatched); |
| if (rqueue->prio < ROWQ_REG_PRIO_IDX) { |
| rd->last_served_ioprio_class = IOPRIO_CLASS_RT; |
| if (row_regular_req_pending(rd)) |
| rd->reg_prio_starvation.starvation_counter++; |
| if (row_low_req_pending(rd)) |
| rd->low_prio_starvation.starvation_counter++; |
| } else if (rqueue->prio < ROWQ_LOW_PRIO_IDX) { |
| rd->last_served_ioprio_class = IOPRIO_CLASS_BE; |
| rd->reg_prio_starvation.starvation_counter = 0; |
| if (row_low_req_pending(rd)) |
| rd->low_prio_starvation.starvation_counter++; |
| } else { |
| rd->last_served_ioprio_class = IOPRIO_CLASS_IDLE; |
| rd->low_prio_starvation.starvation_counter = 0; |
| } |
| } |
| |
| /* |
| * row_get_ioprio_class_to_serve() - Return the next I/O priority |
| * class to dispatch requests from |
| * @rd: pointer to struct row_data |
| * @force: flag indicating if forced dispatch |
| * |
| * This function returns the next I/O priority class to serve |
| * {IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE}. |
| * If there are no more requests in scheduler or if we're idling on some queue |
| * IOPRIO_CLASS_NONE will be returned. |
| * If idling is scheduled on a lower priority queue than the one that needs |
| * to be served, it will be canceled. |
| * |
| */ |
| static int row_get_ioprio_class_to_serve(struct row_data *rd, int force) |
| { |
| int i; |
| int ret = IOPRIO_CLASS_NONE; |
| |
| if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE]) { |
| row_log(rd->dispatch_queue, "No more requests in scheduler"); |
| goto check_idling; |
| } |
| |
| /* First, go over the high priority queues */ |
| for (i = 0; i < ROWQ_REG_PRIO_IDX; i++) { |
| if (!list_empty(&rd->row_queues[i].fifo)) { |
| if (hrtimer_active(&rd->rd_idle_data.hr_timer)) { |
| if (hrtimer_try_to_cancel( |
| &rd->rd_idle_data.hr_timer) >= 0) { |
| row_log(rd->dispatch_queue, |
| "Canceling delayed work on %d. RT pending", |
| rd->rd_idle_data.idling_queue_idx); |
| rd->rd_idle_data.idling_queue_idx = |
| ROWQ_MAX_PRIO; |
| } |
| } |
| |
| if (row_regular_req_pending(rd) && |
| (rd->reg_prio_starvation.starvation_counter >= |
| rd->reg_prio_starvation.starvation_limit)) |
| ret = IOPRIO_CLASS_BE; |
| else if (row_low_req_pending(rd) && |
| (rd->low_prio_starvation.starvation_counter >= |
| rd->low_prio_starvation.starvation_limit)) |
| ret = IOPRIO_CLASS_IDLE; |
| else |
| ret = IOPRIO_CLASS_RT; |
| |
| goto done; |
| } |
| } |
| |
| /* |
| * At the moment idling is implemented only for READ queues. |
| * If enabled on WRITE, this needs updating |
| */ |
| if (hrtimer_active(&rd->rd_idle_data.hr_timer)) { |
| row_log(rd->dispatch_queue, "Delayed work pending. Exiting"); |
| goto done; |
| } |
| check_idling: |
| /* Check for (high priority) idling and enable if needed */ |
| for (i = 0; i < ROWQ_REG_PRIO_IDX && !force; i++) { |
| if (rd->row_queues[i].idle_data.begin_idling && |
| row_queues_def[i].idling_enabled) |
| goto initiate_idling; |
| } |
| |
| /* Regular priority queues */ |
| for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++) { |
| if (list_empty(&rd->row_queues[i].fifo)) { |
| /* We can idle only if this is not a forced dispatch */ |
| if (rd->row_queues[i].idle_data.begin_idling && |
| !force && row_queues_def[i].idling_enabled) |
| goto initiate_idling; |
| } else { |
| if (row_low_req_pending(rd) && |
| (rd->low_prio_starvation.starvation_counter >= |
| rd->low_prio_starvation.starvation_limit)) |
| ret = IOPRIO_CLASS_IDLE; |
| else |
| ret = IOPRIO_CLASS_BE; |
| goto done; |
| } |
| } |
| |
| if (rd->nr_reqs[READ] || rd->nr_reqs[WRITE]) |
| ret = IOPRIO_CLASS_IDLE; |
| goto done; |
| |
| initiate_idling: |
| hrtimer_start(&rd->rd_idle_data.hr_timer, |
| ktime_set(0, rd->rd_idle_data.idle_time_ms * NSEC_PER_MSEC), |
| HRTIMER_MODE_REL); |
| |
| rd->rd_idle_data.idling_queue_idx = i; |
| row_log_rowq(rd, i, "Scheduled delayed work on %d. exiting", i); |
| |
| done: |
| return ret; |
| } |
| |
| static void row_restart_cycle(struct row_data *rd, |
| int start_idx, int end_idx) |
| { |
| int i; |
| |
| row_dump_queues_stat(rd); |
| for (i = start_idx; i < end_idx; i++) { |
| if (rd->row_queues[i].nr_dispatched < |
| rd->row_queues[i].disp_quantum) |
| row_mark_rowq_unserved(rd, i); |
| rd->row_queues[i].nr_dispatched = 0; |
| } |
| row_log(rd->dispatch_queue, "Restarting cycle for class @ %d-%d", |
| start_idx, end_idx); |
| } |
| |
| /* |
| * row_get_next_queue() - selects the next queue to dispatch from |
| * @q: requests queue |
| * @rd: pointer to struct row_data |
| * @start_idx/end_idx: indexes in the row_queues array to select a queue |
| * from. |
| * |
| * Return index of the queues to dispatch from. Error code if fails. |
| * |
| */ |
| static int row_get_next_queue(struct request_queue *q, struct row_data *rd, |
| int start_idx, int end_idx) |
| { |
| int i = start_idx; |
| bool restart = true; |
| int ret = -EIO; |
| |
| do { |
| if (list_empty(&rd->row_queues[i].fifo) || |
| rd->row_queues[i].nr_dispatched >= |
| rd->row_queues[i].disp_quantum) { |
| i++; |
| if (i == end_idx && restart) { |
| /* Restart cycle for this priority class */ |
| row_restart_cycle(rd, start_idx, end_idx); |
| i = start_idx; |
| restart = false; |
| } |
| } else { |
| ret = i; |
| break; |
| } |
| } while (i < end_idx); |
| |
| return ret; |
| } |
| |
| /* |
| * row_dispatch_requests() - selects the next request to dispatch |
| * @q: requests queue |
| * @force: flag indicating if forced dispatch |
| * |
| * Return 0 if no requests were moved to the dispatch queue. |
| * 1 otherwise |
| * |
| */ |
| static int row_dispatch_requests(struct request_queue *q, int force) |
| { |
| struct row_data *rd = (struct row_data *)q->elevator->elevator_data; |
| int ret = 0, currq, ioprio_class_to_serve, start_idx, end_idx; |
| |
| if (force && hrtimer_active(&rd->rd_idle_data.hr_timer)) { |
| if (hrtimer_try_to_cancel(&rd->rd_idle_data.hr_timer) >= 0) { |
| row_log(rd->dispatch_queue, |
| "Canceled delayed work on %d - forced dispatch", |
| rd->rd_idle_data.idling_queue_idx); |
| rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO; |
| } |
| } |
| |
| if (rd->pending_urgent_rq) { |
| row_log(rd->dispatch_queue, "dispatching urgent request"); |
| row_dispatch_insert(rd, rd->pending_urgent_rq); |
| ret = 1; |
| goto done; |
| } |
| |
| ioprio_class_to_serve = row_get_ioprio_class_to_serve(rd, force); |
| row_log(rd->dispatch_queue, "Dispatching from %d priority class", |
| ioprio_class_to_serve); |
| |
| switch (ioprio_class_to_serve) { |
| case IOPRIO_CLASS_NONE: |
| rd->last_served_ioprio_class = IOPRIO_CLASS_NONE; |
| goto done; |
| case IOPRIO_CLASS_RT: |
| start_idx = ROWQ_HIGH_PRIO_IDX; |
| end_idx = ROWQ_REG_PRIO_IDX; |
| break; |
| case IOPRIO_CLASS_BE: |
| start_idx = ROWQ_REG_PRIO_IDX; |
| end_idx = ROWQ_LOW_PRIO_IDX; |
| break; |
| case IOPRIO_CLASS_IDLE: |
| start_idx = ROWQ_LOW_PRIO_IDX; |
| end_idx = ROWQ_MAX_PRIO; |
| break; |
| default: |
| pr_err("%s(): Invalid I/O priority class", __func__); |
| goto done; |
| } |
| |
| currq = row_get_next_queue(q, rd, start_idx, end_idx); |
| |
| /* Dispatch */ |
| if (currq >= 0) { |
| row_dispatch_insert(rd, |
| rq_entry_fifo(rd->row_queues[currq].fifo.next)); |
| ret = 1; |
| } |
| done: |
| return ret; |
| } |
| |
| /* |
| * row_init_queue() - Init scheduler data structures |
| * @q: requests queue |
| * |
| * Return pointer to struct row_data to be saved in elevator for |
| * this dispatch queue |
| * |
| */ |
| static void *row_init_queue(struct request_queue *q) |
| { |
| |
| struct row_data *rdata; |
| int i; |
| |
| rdata = kmalloc_node(sizeof(*rdata), |
| GFP_KERNEL | __GFP_ZERO, q->node); |
| if (!rdata) |
| return NULL; |
| |
| memset(rdata, 0, sizeof(*rdata)); |
| for (i = 0; i < ROWQ_MAX_PRIO; i++) { |
| INIT_LIST_HEAD(&rdata->row_queues[i].fifo); |
| rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum; |
| rdata->row_queues[i].rdata = rdata; |
| rdata->row_queues[i].prio = i; |
| rdata->row_queues[i].idle_data.begin_idling = false; |
| rdata->row_queues[i].idle_data.last_insert_time = |
| ktime_set(0, 0); |
| } |
| |
| rdata->reg_prio_starvation.starvation_limit = |
| ROW_REG_STARVATION_TOLLERANCE; |
| rdata->low_prio_starvation.starvation_limit = |
| ROW_LOW_STARVATION_TOLLERANCE; |
| /* |
| * Currently idling is enabled only for READ queues. If we want to |
| * enable it for write queues also, note that idling frequency will |
| * be the same in both cases |
| */ |
| rdata->rd_idle_data.idle_time_ms = ROW_IDLE_TIME_MSEC; |
| rdata->rd_idle_data.freq_ms = ROW_READ_FREQ_MSEC; |
| hrtimer_init(&rdata->rd_idle_data.hr_timer, |
| CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| rdata->rd_idle_data.hr_timer.function = &row_idle_hrtimer_fn; |
| |
| INIT_WORK(&rdata->rd_idle_data.idle_work, kick_queue); |
| rdata->last_served_ioprio_class = IOPRIO_CLASS_NONE; |
| rdata->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO; |
| rdata->dispatch_queue = q; |
| |
| return rdata; |
| } |
| |
| /* |
| * row_exit_queue() - called on unloading the RAW scheduler |
| * @e: poiner to struct elevator_queue |
| * |
| */ |
| static void row_exit_queue(struct elevator_queue *e) |
| { |
| struct row_data *rd = (struct row_data *)e->elevator_data; |
| int i; |
| |
| for (i = 0; i < ROWQ_MAX_PRIO; i++) |
| BUG_ON(!list_empty(&rd->row_queues[i].fifo)); |
| if (hrtimer_cancel(&rd->rd_idle_data.hr_timer)) |
| pr_err("%s(): idle timer was active!", __func__); |
| rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO; |
| kfree(rd); |
| } |
| |
| /* |
| * row_merged_requests() - Called when 2 requests are merged |
| * @q: requests queue |
| * @rq: request the two requests were merged into |
| * @next: request that was merged |
| */ |
| static void row_merged_requests(struct request_queue *q, struct request *rq, |
| struct request *next) |
| { |
| struct row_queue *rqueue = RQ_ROWQ(next); |
| |
| list_del_init(&next->queuelist); |
| rqueue->nr_req--; |
| if (rqueue->rdata->pending_urgent_rq == next) { |
| pr_err("\n\nROW_WARNING: merging pending urgent!"); |
| rqueue->rdata->pending_urgent_rq = rq; |
| rq->cmd_flags |= REQ_URGENT; |
| WARN_ON(!(next->cmd_flags & REQ_URGENT)); |
| next->cmd_flags &= ~REQ_URGENT; |
| } |
| rqueue->rdata->nr_reqs[rq_data_dir(rq)]--; |
| } |
| |
| /* |
| * row_get_queue_prio() - Get queue priority for a given request |
| * |
| * This is a helping function which purpose is to determine what |
| * ROW queue the given request should be added to (and |
| * dispatched from later on) |
| * |
| */ |
| static enum row_queue_prio row_get_queue_prio(struct request *rq, |
| struct row_data *rd) |
| { |
| const int data_dir = rq_data_dir(rq); |
| const bool is_sync = rq_is_sync(rq); |
| enum row_queue_prio q_type = ROWQ_MAX_PRIO; |
| int ioprio_class = IOPRIO_PRIO_CLASS(rq->elv.icq->ioc->ioprio); |
| |
| switch (ioprio_class) { |
| case IOPRIO_CLASS_RT: |
| if (data_dir == READ) |
| q_type = ROWQ_PRIO_HIGH_READ; |
| else if (is_sync) |
| q_type = ROWQ_PRIO_HIGH_SWRITE; |
| else { |
| pr_err("%s:%s(): got a simple write from RT_CLASS. How???", |
| rq->rq_disk->disk_name, __func__); |
| q_type = ROWQ_PRIO_REG_WRITE; |
| } |
| break; |
| case IOPRIO_CLASS_IDLE: |
| if (data_dir == READ) |
| q_type = ROWQ_PRIO_LOW_READ; |
| else if (is_sync) |
| q_type = ROWQ_PRIO_LOW_SWRITE; |
| else { |
| pr_err("%s:%s(): got a simple write from IDLE_CLASS. How???", |
| rq->rq_disk->disk_name, __func__); |
| q_type = ROWQ_PRIO_REG_WRITE; |
| } |
| break; |
| case IOPRIO_CLASS_NONE: |
| case IOPRIO_CLASS_BE: |
| default: |
| if (data_dir == READ) |
| q_type = ROWQ_PRIO_REG_READ; |
| else if (is_sync) |
| q_type = ROWQ_PRIO_REG_SWRITE; |
| else |
| q_type = ROWQ_PRIO_REG_WRITE; |
| break; |
| } |
| |
| return q_type; |
| } |
| |
| /* |
| * row_set_request() - Set ROW data structures associated with this request. |
| * @q: requests queue |
| * @rq: pointer to the request |
| * @gfp_mask: ignored |
| * |
| */ |
| static int |
| row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) |
| { |
| struct row_data *rd = (struct row_data *)q->elevator->elevator_data; |
| unsigned long flags; |
| |
| spin_lock_irqsave(q->queue_lock, flags); |
| rq->elv.priv[0] = |
| (void *)(&rd->row_queues[row_get_queue_prio(rq, rd)]); |
| spin_unlock_irqrestore(q->queue_lock, flags); |
| |
| return 0; |
| } |
| |
| /********** Helping sysfs functions/defenitions for ROW attributes ******/ |
| static ssize_t row_var_show(int var, char *page) |
| { |
| return snprintf(page, 100, "%d\n", var); |
| } |
| |
| static ssize_t row_var_store(int *var, const char *page, size_t count) |
| { |
| int err; |
| err = kstrtoul(page, 10, (unsigned long *)var); |
| |
| return count; |
| } |
| |
| #define SHOW_FUNCTION(__FUNC, __VAR) \ |
| static ssize_t __FUNC(struct elevator_queue *e, char *page) \ |
| { \ |
| struct row_data *rowd = e->elevator_data; \ |
| int __data = __VAR; \ |
| return row_var_show(__data, (page)); \ |
| } |
| SHOW_FUNCTION(row_hp_read_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum); |
| SHOW_FUNCTION(row_rp_read_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum); |
| SHOW_FUNCTION(row_hp_swrite_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum); |
| SHOW_FUNCTION(row_rp_swrite_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum); |
| SHOW_FUNCTION(row_rp_write_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum); |
| SHOW_FUNCTION(row_lp_read_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum); |
| SHOW_FUNCTION(row_lp_swrite_quantum_show, |
| rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum); |
| SHOW_FUNCTION(row_rd_idle_data_show, rowd->rd_idle_data.idle_time_ms); |
| SHOW_FUNCTION(row_rd_idle_data_freq_show, rowd->rd_idle_data.freq_ms); |
| SHOW_FUNCTION(row_reg_starv_limit_show, |
| rowd->reg_prio_starvation.starvation_limit); |
| SHOW_FUNCTION(row_low_starv_limit_show, |
| rowd->low_prio_starvation.starvation_limit); |
| #undef SHOW_FUNCTION |
| |
| #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ |
| static ssize_t __FUNC(struct elevator_queue *e, \ |
| const char *page, size_t count) \ |
| { \ |
| struct row_data *rowd = e->elevator_data; \ |
| int __data; \ |
| int ret = row_var_store(&__data, (page), count); \ |
| if (__data < (MIN)) \ |
| __data = (MIN); \ |
| else if (__data > (MAX)) \ |
| __data = (MAX); \ |
| *(__PTR) = __data; \ |
| return ret; \ |
| } |
| STORE_FUNCTION(row_hp_read_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX); |
| STORE_FUNCTION(row_rp_read_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_hp_swrite_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_rp_swrite_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_rp_write_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_lp_read_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_lp_swrite_quantum_store, |
| &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_rd_idle_data_store, &rowd->rd_idle_data.idle_time_ms, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_rd_idle_data_freq_store, &rowd->rd_idle_data.freq_ms, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_reg_starv_limit_store, |
| &rowd->reg_prio_starvation.starvation_limit, |
| 1, INT_MAX); |
| STORE_FUNCTION(row_low_starv_limit_store, |
| &rowd->low_prio_starvation.starvation_limit, |
| 1, INT_MAX); |
| |
| #undef STORE_FUNCTION |
| |
| #define ROW_ATTR(name) \ |
| __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \ |
| row_##name##_store) |
| |
| static struct elv_fs_entry row_attrs[] = { |
| ROW_ATTR(hp_read_quantum), |
| ROW_ATTR(rp_read_quantum), |
| ROW_ATTR(hp_swrite_quantum), |
| ROW_ATTR(rp_swrite_quantum), |
| ROW_ATTR(rp_write_quantum), |
| ROW_ATTR(lp_read_quantum), |
| ROW_ATTR(lp_swrite_quantum), |
| ROW_ATTR(rd_idle_data), |
| ROW_ATTR(rd_idle_data_freq), |
| ROW_ATTR(reg_starv_limit), |
| ROW_ATTR(low_starv_limit), |
| __ATTR_NULL |
| }; |
| |
| static struct elevator_type iosched_row = { |
| .ops = { |
| .elevator_merge_req_fn = row_merged_requests, |
| .elevator_dispatch_fn = row_dispatch_requests, |
| .elevator_add_req_fn = row_add_request, |
| .elevator_reinsert_req_fn = row_reinsert_req, |
| .elevator_is_urgent_fn = row_urgent_pending, |
| .elevator_completed_req_fn = row_completed_req, |
| .elevator_former_req_fn = elv_rb_former_request, |
| .elevator_latter_req_fn = elv_rb_latter_request, |
| .elevator_set_req_fn = row_set_request, |
| .elevator_init_fn = row_init_queue, |
| .elevator_exit_fn = row_exit_queue, |
| }, |
| .icq_size = sizeof(struct io_cq), |
| .icq_align = __alignof__(struct io_cq), |
| .elevator_attrs = row_attrs, |
| .elevator_name = "row", |
| .elevator_owner = THIS_MODULE, |
| }; |
| |
| static int __init row_init(void) |
| { |
| elv_register(&iosched_row); |
| return 0; |
| } |
| |
| static void __exit row_exit(void) |
| { |
| elv_unregister(&iosched_row); |
| } |
| |
| module_init(row_init); |
| module_exit(row_exit); |
| |
| MODULE_LICENSE("GPLv2"); |
| MODULE_DESCRIPTION("Read Over Write IO scheduler"); |