123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092 |
- /*
- * ROW (Read Over Write) I/O scheduler.
- *
- * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
- /* See Documentation/block/row-iosched.txt */
- #include <linux/kernel.h>
- #include <linux/fs.h>
- #include <linux/blkdev.h>
- #include <linux/elevator.h>
- #include <linux/bio.h>
- #include <linux/module.h>
- #include <linux/slab.h>
- #include <linux/init.h>
- #include <linux/compiler.h>
- #include <linux/blktrace_api.h>
- #include <linux/hrtimer.h>
- /*
- * enum row_queue_prio - Priorities of the ROW queues
- *
- * This enum defines the priorities (and the number of queues)
- * the requests will be distributed to. The higher priority -
- * the bigger is the "bus time" (or the dispatch quantum) given
- * to that queue.
- * ROWQ_PRIO_HIGH_READ - is the higher priority queue.
- *
- */
- enum row_queue_prio {
- ROWQ_PRIO_HIGH_READ = 0,
- ROWQ_PRIO_HIGH_SWRITE,
- ROWQ_PRIO_REG_READ,
- ROWQ_PRIO_REG_SWRITE,
- ROWQ_PRIO_REG_WRITE,
- ROWQ_PRIO_LOW_READ,
- ROWQ_PRIO_LOW_SWRITE,
- ROWQ_MAX_PRIO,
- };
- /*
- * The following indexes define the distribution of ROW queues according to
- * priorities. Each index defines the first queue in that priority group.
- */
- #define ROWQ_HIGH_PRIO_IDX ROWQ_PRIO_HIGH_READ
- #define ROWQ_REG_PRIO_IDX ROWQ_PRIO_REG_READ
- #define ROWQ_LOW_PRIO_IDX ROWQ_PRIO_LOW_READ
- /**
- * struct row_queue_params - ROW queue parameters
- * @idling_enabled: Flag indicating whether idling is enable on
- * the queue
- * @quantum: Number of requests to be dispatched from this queue
- * in a dispatch cycle
- * @is_urgent: Flags indicating whether the queue can notify on
- * urgent requests
- *
- */
- struct row_queue_params {
- bool idling_enabled;
- int quantum;
- bool is_urgent;
- };
- /*
- * This array holds the default values of the different configurables
- * for each ROW queue. Each row of the array holds the following values:
- * {idling_enabled, quantum, is_urgent}
- * Each row corresponds to a queue with the same index (according to
- * enum row_queue_prio)
- * Note: The quantums are valid inside their priority type. For example:
- * For every 10 high priority read requests, 1 high priority sync
- * write will be dispatched.
- * For every 100 regular read requests 1 regular write request will
- * be dispatched.
- */
- static const struct row_queue_params row_queues_def[] = {
- /* idling_enabled, quantum, is_urgent */
- {true, 10, true}, /* ROWQ_PRIO_HIGH_READ */
- {false, 1, false}, /* ROWQ_PRIO_HIGH_SWRITE */
- {true, 100, true}, /* ROWQ_PRIO_REG_READ */
- {false, 1, false}, /* ROWQ_PRIO_REG_SWRITE */
- {false, 1, false}, /* ROWQ_PRIO_REG_WRITE */
- {false, 1, false}, /* ROWQ_PRIO_LOW_READ */
- {false, 1, false} /* ROWQ_PRIO_LOW_SWRITE */
- };
- /* Default values for idling on read queues (in msec) */
- #define ROW_IDLE_TIME_MSEC 5
- #define ROW_READ_FREQ_MSEC 5
- /**
- * struct rowq_idling_data - parameters for idling on the queue
- * @last_insert_time: time the last request was inserted
- * to the queue
- * @begin_idling: flag indicating wether we should idle
- *
- */
- struct rowq_idling_data {
- ktime_t last_insert_time;
- bool begin_idling;
- };
- /**
- * struct row_queue - requests grouping structure
- * @rdata: parent row_data structure
- * @fifo: fifo of requests
- * @prio: queue priority (enum row_queue_prio)
- * @nr_dispatched: number of requests already dispatched in
- * the current dispatch cycle
- * @nr_req: number of requests in queue
- * @dispatch quantum: number of requests this queue may
- * dispatch in a dispatch cycle
- * @idle_data: data for idling on queues
- *
- */
- struct row_queue {
- struct row_data *rdata;
- struct list_head fifo;
- enum row_queue_prio prio;
- unsigned int nr_dispatched;
- unsigned int nr_req;
- int disp_quantum;
- /* used only for READ queues */
- struct rowq_idling_data idle_data;
- };
- /**
- * struct idling_data - data for idling on empty rqueue
- * @idle_time_ms: idling duration (msec)
- * @freq_ms: min time between two requests that
- * triger idling (msec)
- * @hr_timer: idling timer
- * @idle_work: the work to be scheduled when idling timer expires
- * @idling_queue_idx: index of the queues we're idling on
- *
- */
- struct idling_data {
- s64 idle_time_ms;
- s64 freq_ms;
- struct hrtimer hr_timer;
- struct work_struct idle_work;
- enum row_queue_prio idling_queue_idx;
- };
- /**
- * struct starvation_data - data for starvation management
- * @starvation_limit: number of times this priority class
- * can tolerate being starved
- * @starvation_counter: number of requests from higher
- * priority classes that were dispatched while this
- * priority request were pending
- *
- */
- struct starvation_data {
- int starvation_limit;
- int starvation_counter;
- };
- /**
- * struct row_queue - Per block device rqueue structure
- * @dispatch_queue: dispatch rqueue
- * @row_queues: array of priority request queues
- * @rd_idle_data: data for idling after READ request
- * @nr_reqs: nr_reqs[0] holds the number of all READ requests in
- * scheduler, nr_reqs[1] holds the number of all WRITE
- * requests in scheduler
- * @urgent_in_flight: flag indicating that there is an urgent
- * request that was dispatched to driver and is yet to
- * complete.
- * @pending_urgent_rq: pointer to the pending urgent request
- * @last_served_ioprio_class: I/O priority class that was last dispatched from
- * @reg_prio_starvation: starvation data for REGULAR priority queues
- * @low_prio_starvation: starvation data for LOW priority queues
- * @cycle_flags: used for marking unserved queueus
- *
- */
- struct row_data {
- struct request_queue *dispatch_queue;
- struct row_queue row_queues[ROWQ_MAX_PRIO];
- struct idling_data rd_idle_data;
- unsigned int nr_reqs[2];
- bool urgent_in_flight;
- struct request *pending_urgent_rq;
- int last_served_ioprio_class;
- #define ROW_REG_STARVATION_TOLLERANCE 5000
- struct starvation_data reg_prio_starvation;
- #define ROW_LOW_STARVATION_TOLLERANCE 10000
- struct starvation_data low_prio_starvation;
- unsigned int cycle_flags;
- };
- #define RQ_ROWQ(rq) ((struct row_queue *) ((rq)->elv.priv[0]))
- #define row_log(q, fmt, args...) \
- blk_add_trace_msg(q, "%s():" fmt , __func__, ##args)
- #define row_log_rowq(rdata, rowq_id, fmt, args...) \
- blk_add_trace_msg(rdata->dispatch_queue, "rowq%d " fmt, \
- rowq_id, ##args)
- static inline void row_mark_rowq_unserved(struct row_data *rd,
- enum row_queue_prio qnum)
- {
- rd->cycle_flags |= (1 << qnum);
- }
- static inline void row_clear_rowq_unserved(struct row_data *rd,
- enum row_queue_prio qnum)
- {
- rd->cycle_flags &= ~(1 << qnum);
- }
- static inline int row_rowq_unserved(struct row_data *rd,
- enum row_queue_prio qnum)
- {
- return rd->cycle_flags & (1 << qnum);
- }
- static inline void __maybe_unused row_dump_queues_stat(struct row_data *rd)
- {
- int i;
- row_log(rd->dispatch_queue, " Queues status:");
- for (i = 0; i < ROWQ_MAX_PRIO; i++)
- row_log(rd->dispatch_queue,
- "queue%d: dispatched= %d, nr_req=%d", i,
- rd->row_queues[i].nr_dispatched,
- rd->row_queues[i].nr_req);
- }
- /******************** Static helper functions ***********************/
- static void kick_queue(struct work_struct *work)
- {
- struct idling_data *read_data =
- container_of(work, struct idling_data, idle_work);
- struct row_data *rd =
- container_of(read_data, struct row_data, rd_idle_data);
- blk_run_queue(rd->dispatch_queue);
- }
- static enum hrtimer_restart row_idle_hrtimer_fn(struct hrtimer *hr_timer)
- {
- struct idling_data *read_data =
- container_of(hr_timer, struct idling_data, hr_timer);
- struct row_data *rd =
- container_of(read_data, struct row_data, rd_idle_data);
- row_log_rowq(rd, rd->rd_idle_data.idling_queue_idx,
- "Performing delayed work");
- /* Mark idling process as done */
- rd->row_queues[rd->rd_idle_data.idling_queue_idx].
- idle_data.begin_idling = false;
- rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
- if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE])
- row_log(rd->dispatch_queue, "No requests in scheduler");
- else
- kblockd_schedule_work(rd->dispatch_queue,
- &read_data->idle_work);
- return HRTIMER_NORESTART;
- }
- /*
- * row_regular_req_pending() - Check if there are REGULAR priority requests
- * Pending in scheduler
- * @rd: pointer to struct row_data
- *
- * Returns True if there are REGULAR priority requests in scheduler queues.
- * False, otherwise.
- */
- static inline bool row_regular_req_pending(struct row_data *rd)
- {
- int i;
- for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++)
- if (!list_empty(&rd->row_queues[i].fifo))
- return true;
- return false;
- }
- /*
- * row_low_req_pending() - Check if there are LOW priority requests
- * Pending in scheduler
- * @rd: pointer to struct row_data
- *
- * Returns True if there are LOW priority requests in scheduler queues.
- * False, otherwise.
- */
- static inline bool row_low_req_pending(struct row_data *rd)
- {
- int i;
- for (i = ROWQ_LOW_PRIO_IDX; i < ROWQ_MAX_PRIO; i++)
- if (!list_empty(&rd->row_queues[i].fifo))
- return true;
- return false;
- }
- /******************* Elevator callback functions *********************/
- /*
- * row_add_request() - Add request to the scheduler
- * @q: requests queue
- * @rq: request to add
- *
- */
- static void row_add_request(struct request_queue *q,
- struct request *rq)
- {
- struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
- struct row_queue *rqueue = RQ_ROWQ(rq);
- s64 diff_ms;
- bool queue_was_empty = list_empty(&rqueue->fifo);
- list_add_tail(&rq->queuelist, &rqueue->fifo);
- rd->nr_reqs[rq_data_dir(rq)]++;
- rqueue->nr_req++;
- rq_set_fifo_time(rq, jiffies); /* for statistics*/
- if (rq->cmd_flags & REQ_URGENT) {
- WARN_ON(1);
- blk_dump_rq_flags(rq, "");
- rq->cmd_flags &= ~REQ_URGENT;
- }
- if (row_queues_def[rqueue->prio].idling_enabled) {
- if (rd->rd_idle_data.idling_queue_idx == rqueue->prio &&
- hrtimer_active(&rd->rd_idle_data.hr_timer)) {
- if (hrtimer_try_to_cancel(
- &rd->rd_idle_data.hr_timer) >= 0) {
- row_log_rowq(rd, rqueue->prio,
- "Canceled delayed work on %d",
- rd->rd_idle_data.idling_queue_idx);
- rd->rd_idle_data.idling_queue_idx =
- ROWQ_MAX_PRIO;
- }
- }
- diff_ms = ktime_to_ms(ktime_sub(ktime_get(),
- rqueue->idle_data.last_insert_time));
- if (unlikely(diff_ms < 0)) {
- pr_err("%s(): time delta error: diff_ms < 0",
- __func__);
- rqueue->idle_data.begin_idling = false;
- return;
- }
- if (diff_ms < rd->rd_idle_data.freq_ms) {
- rqueue->idle_data.begin_idling = true;
- row_log_rowq(rd, rqueue->prio, "Enable idling");
- } else {
- rqueue->idle_data.begin_idling = false;
- row_log_rowq(rd, rqueue->prio, "Disable idling (%ldms)",
- (long)diff_ms);
- }
- rqueue->idle_data.last_insert_time = ktime_get();
- }
- if (row_queues_def[rqueue->prio].is_urgent &&
- !rd->pending_urgent_rq && !rd->urgent_in_flight) {
- /* Handle High Priority queues */
- if (rqueue->prio < ROWQ_REG_PRIO_IDX &&
- rd->last_served_ioprio_class != IOPRIO_CLASS_RT &&
- queue_was_empty) {
- row_log_rowq(rd, rqueue->prio,
- "added (high prio) urgent request");
- rq->cmd_flags |= REQ_URGENT;
- rd->pending_urgent_rq = rq;
- } else if (row_rowq_unserved(rd, rqueue->prio)) {
- /* Handle Regular priotity queues */
- row_log_rowq(rd, rqueue->prio,
- "added urgent request (total on queue=%d)",
- rqueue->nr_req);
- rq->cmd_flags |= REQ_URGENT;
- rd->pending_urgent_rq = rq;
- }
- } else
- row_log_rowq(rd, rqueue->prio,
- "added request (total on queue=%d)", rqueue->nr_req);
- }
- /**
- * row_reinsert_req() - Reinsert request back to the scheduler
- * @q: requests queue
- * @rq: request to add
- *
- * Reinsert the given request back to the queue it was
- * dispatched from as if it was never dispatched.
- *
- * Returns 0 on success, error code otherwise
- */
- static int row_reinsert_req(struct request_queue *q,
- struct request *rq)
- {
- struct row_data *rd = q->elevator->elevator_data;
- struct row_queue *rqueue = RQ_ROWQ(rq);
- if (!rqueue || rqueue->prio >= ROWQ_MAX_PRIO)
- return -EIO;
- list_add(&rq->queuelist, &rqueue->fifo);
- rd->nr_reqs[rq_data_dir(rq)]++;
- rqueue->nr_req++;
- row_log_rowq(rd, rqueue->prio,
- "%s request reinserted (total on queue=%d)",
- (rq_data_dir(rq) == READ ? "READ" : "write"), rqueue->nr_req);
- if (rq->cmd_flags & REQ_URGENT) {
- /*
- * It's not compliant with the design to re-insert
- * urgent requests. We want to be able to track this
- * down.
- */
- WARN_ON(1);
- if (!rd->urgent_in_flight) {
- pr_err("%s(): no urgent in flight", __func__);
- } else {
- rd->urgent_in_flight = false;
- pr_err("%s(): reinserting URGENT %s req",
- __func__,
- (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
- if (rd->pending_urgent_rq) {
- pr_err("%s(): urgent rq is pending",
- __func__);
- rd->pending_urgent_rq->cmd_flags &= ~REQ_URGENT;
- }
- rd->pending_urgent_rq = rq;
- }
- }
- return 0;
- }
- static void row_completed_req(struct request_queue *q, struct request *rq)
- {
- struct row_data *rd = q->elevator->elevator_data;
- if (rq->cmd_flags & REQ_URGENT) {
- if (!rd->urgent_in_flight) {
- WARN_ON(1);
- pr_err("%s(): URGENT req but urgent_in_flight = F",
- __func__);
- }
- rd->urgent_in_flight = false;
- rq->cmd_flags &= ~REQ_URGENT;
- }
- row_log(q, "completed %s %s req.",
- (rq->cmd_flags & REQ_URGENT ? "URGENT" : "regular"),
- (rq_data_dir(rq) == READ ? "READ" : "WRITE"));
- }
- /**
- * row_urgent_pending() - Return TRUE if there is an urgent
- * request on scheduler
- * @q: requests queue
- */
- static bool row_urgent_pending(struct request_queue *q)
- {
- struct row_data *rd = q->elevator->elevator_data;
- if (rd->urgent_in_flight) {
- row_log(rd->dispatch_queue, "%d urgent requests in flight",
- rd->urgent_in_flight);
- return false;
- }
- if (rd->pending_urgent_rq) {
- row_log(rd->dispatch_queue, "Urgent request pending");
- return true;
- }
- row_log(rd->dispatch_queue, "no urgent request pending/in flight");
- return false;
- }
- /**
- * row_remove_request() - Remove given request from scheduler
- * @q: requests queue
- * @rq: request to remove
- *
- */
- static void row_remove_request(struct row_data *rd,
- struct request *rq)
- {
- struct row_queue *rqueue = RQ_ROWQ(rq);
- list_del_init(&(rq)->queuelist);
- if (rd->pending_urgent_rq == rq)
- rd->pending_urgent_rq = NULL;
- else
- BUG_ON(rq->cmd_flags & REQ_URGENT);
- rqueue->nr_req--;
- rd->nr_reqs[rq_data_dir(rq)]--;
- }
- /*
- * row_dispatch_insert() - move request to dispatch queue
- * @rd: pointer to struct row_data
- * @rq: the request to dispatch
- *
- * This function moves the given request to the dispatch queue
- *
- */
- static void row_dispatch_insert(struct row_data *rd, struct request *rq)
- {
- struct row_queue *rqueue = RQ_ROWQ(rq);
- row_remove_request(rd, rq);
- elv_dispatch_sort(rd->dispatch_queue, rq);
- if (rq->cmd_flags & REQ_URGENT) {
- WARN_ON(rd->urgent_in_flight);
- rd->urgent_in_flight = true;
- }
- rqueue->nr_dispatched++;
- row_clear_rowq_unserved(rd, rqueue->prio);
- row_log_rowq(rd, rqueue->prio,
- " Dispatched request %p nr_disp = %d", rq,
- rqueue->nr_dispatched);
- if (rqueue->prio < ROWQ_REG_PRIO_IDX) {
- rd->last_served_ioprio_class = IOPRIO_CLASS_RT;
- if (row_regular_req_pending(rd))
- rd->reg_prio_starvation.starvation_counter++;
- if (row_low_req_pending(rd))
- rd->low_prio_starvation.starvation_counter++;
- } else if (rqueue->prio < ROWQ_LOW_PRIO_IDX) {
- rd->last_served_ioprio_class = IOPRIO_CLASS_BE;
- rd->reg_prio_starvation.starvation_counter = 0;
- if (row_low_req_pending(rd))
- rd->low_prio_starvation.starvation_counter++;
- } else {
- rd->last_served_ioprio_class = IOPRIO_CLASS_IDLE;
- rd->low_prio_starvation.starvation_counter = 0;
- }
- }
- /*
- * row_get_ioprio_class_to_serve() - Return the next I/O priority
- * class to dispatch requests from
- * @rd: pointer to struct row_data
- * @force: flag indicating if forced dispatch
- *
- * This function returns the next I/O priority class to serve
- * {IOPRIO_CLASS_NONE, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE}.
- * If there are no more requests in scheduler or if we're idling on some queue
- * IOPRIO_CLASS_NONE will be returned.
- * If idling is scheduled on a lower priority queue than the one that needs
- * to be served, it will be canceled.
- *
- */
- static int row_get_ioprio_class_to_serve(struct row_data *rd, int force)
- {
- int i;
- int ret = IOPRIO_CLASS_NONE;
- if (!rd->nr_reqs[READ] && !rd->nr_reqs[WRITE]) {
- row_log(rd->dispatch_queue, "No more requests in scheduler");
- goto check_idling;
- }
- /* First, go over the high priority queues */
- for (i = 0; i < ROWQ_REG_PRIO_IDX; i++) {
- if (!list_empty(&rd->row_queues[i].fifo)) {
- if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
- if (hrtimer_try_to_cancel(
- &rd->rd_idle_data.hr_timer) >= 0) {
- row_log(rd->dispatch_queue,
- "Canceling delayed work on %d. RT pending",
- rd->rd_idle_data.idling_queue_idx);
- rd->rd_idle_data.idling_queue_idx =
- ROWQ_MAX_PRIO;
- }
- }
- if (row_regular_req_pending(rd) &&
- (rd->reg_prio_starvation.starvation_counter >=
- rd->reg_prio_starvation.starvation_limit))
- ret = IOPRIO_CLASS_BE;
- else if (row_low_req_pending(rd) &&
- (rd->low_prio_starvation.starvation_counter >=
- rd->low_prio_starvation.starvation_limit))
- ret = IOPRIO_CLASS_IDLE;
- else
- ret = IOPRIO_CLASS_RT;
- goto done;
- }
- }
- /*
- * At the moment idling is implemented only for READ queues.
- * If enabled on WRITE, this needs updating
- */
- if (hrtimer_active(&rd->rd_idle_data.hr_timer)) {
- row_log(rd->dispatch_queue, "Delayed work pending. Exiting");
- goto done;
- }
- check_idling:
- /* Check for (high priority) idling and enable if needed */
- for (i = 0; i < ROWQ_REG_PRIO_IDX && !force; i++) {
- if (rd->row_queues[i].idle_data.begin_idling &&
- row_queues_def[i].idling_enabled)
- goto initiate_idling;
- }
- /* Regular priority queues */
- for (i = ROWQ_REG_PRIO_IDX; i < ROWQ_LOW_PRIO_IDX; i++) {
- if (list_empty(&rd->row_queues[i].fifo)) {
- /* We can idle only if this is not a forced dispatch */
- if (rd->row_queues[i].idle_data.begin_idling &&
- !force && row_queues_def[i].idling_enabled)
- goto initiate_idling;
- } else {
- if (row_low_req_pending(rd) &&
- (rd->low_prio_starvation.starvation_counter >=
- rd->low_prio_starvation.starvation_limit))
- ret = IOPRIO_CLASS_IDLE;
- else
- ret = IOPRIO_CLASS_BE;
- goto done;
- }
- }
- if (rd->nr_reqs[READ] || rd->nr_reqs[WRITE])
- ret = IOPRIO_CLASS_IDLE;
- goto done;
- initiate_idling:
- hrtimer_start(&rd->rd_idle_data.hr_timer,
- ktime_set(0, rd->rd_idle_data.idle_time_ms * NSEC_PER_MSEC),
- HRTIMER_MODE_REL);
- rd->rd_idle_data.idling_queue_idx = i;
- row_log_rowq(rd, i, "Scheduled delayed work on %d. exiting", i);
- done:
- return ret;
- }
- static void row_restart_cycle(struct row_data *rd,
- int start_idx, int end_idx)
- {
- int i;
- row_dump_queues_stat(rd);
- for (i = start_idx; i < end_idx; i++) {
- if (rd->row_queues[i].nr_dispatched <
- rd->row_queues[i].disp_quantum)
- row_mark_rowq_unserved(rd, i);
- rd->row_queues[i].nr_dispatched = 0;
- }
- row_log(rd->dispatch_queue, "Restarting cycle for class @ %d-%d",
- start_idx, end_idx);
- }
- /*
- * row_get_next_queue() - selects the next queue to dispatch from
- * @q: requests queue
- * @rd: pointer to struct row_data
- * @start_idx/end_idx: indexes in the row_queues array to select a queue
- * from.
- *
- * Return index of the queues to dispatch from. Error code if fails.
- *
- */
- static int row_get_next_queue(struct request_queue *q, struct row_data *rd,
- int start_idx, int end_idx)
- {
- int i = start_idx;
- bool restart = true;
- int ret = -EIO;
- do {
- if (list_empty(&rd->row_queues[i].fifo) ||
- rd->row_queues[i].nr_dispatched >=
- rd->row_queues[i].disp_quantum) {
- i++;
- if (i == end_idx && restart) {
- /* Restart cycle for this priority class */
- row_restart_cycle(rd, start_idx, end_idx);
- i = start_idx;
- restart = false;
- }
- } else {
- ret = i;
- break;
- }
- } while (i < end_idx);
- return ret;
- }
- /*
- * row_dispatch_requests() - selects the next request to dispatch
- * @q: requests queue
- * @force: flag indicating if forced dispatch
- *
- * Return 0 if no requests were moved to the dispatch queue.
- * 1 otherwise
- *
- */
- static int row_dispatch_requests(struct request_queue *q, int force)
- {
- struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
- int ret = 0, currq, ioprio_class_to_serve, start_idx, end_idx;
- if (force && hrtimer_active(&rd->rd_idle_data.hr_timer)) {
- if (hrtimer_try_to_cancel(&rd->rd_idle_data.hr_timer) >= 0) {
- row_log(rd->dispatch_queue,
- "Canceled delayed work on %d - forced dispatch",
- rd->rd_idle_data.idling_queue_idx);
- rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
- }
- }
- if (rd->pending_urgent_rq) {
- row_log(rd->dispatch_queue, "dispatching urgent request");
- row_dispatch_insert(rd, rd->pending_urgent_rq);
- ret = 1;
- goto done;
- }
- ioprio_class_to_serve = row_get_ioprio_class_to_serve(rd, force);
- row_log(rd->dispatch_queue, "Dispatching from %d priority class",
- ioprio_class_to_serve);
- switch (ioprio_class_to_serve) {
- case IOPRIO_CLASS_NONE:
- rd->last_served_ioprio_class = IOPRIO_CLASS_NONE;
- goto done;
- case IOPRIO_CLASS_RT:
- start_idx = ROWQ_HIGH_PRIO_IDX;
- end_idx = ROWQ_REG_PRIO_IDX;
- break;
- case IOPRIO_CLASS_BE:
- start_idx = ROWQ_REG_PRIO_IDX;
- end_idx = ROWQ_LOW_PRIO_IDX;
- break;
- case IOPRIO_CLASS_IDLE:
- start_idx = ROWQ_LOW_PRIO_IDX;
- end_idx = ROWQ_MAX_PRIO;
- break;
- default:
- pr_err("%s(): Invalid I/O priority class", __func__);
- goto done;
- }
- currq = row_get_next_queue(q, rd, start_idx, end_idx);
- /* Dispatch */
- if (currq >= 0) {
- row_dispatch_insert(rd,
- rq_entry_fifo(rd->row_queues[currq].fifo.next));
- ret = 1;
- }
- done:
- return ret;
- }
- /*
- * row_init_queue() - Init scheduler data structures
- * @q: requests queue
- *
- * Return pointer to struct row_data to be saved in elevator for
- * this dispatch queue
- *
- */
- static void *row_init_queue(struct request_queue *q)
- {
- struct row_data *rdata;
- int i;
- rdata = kmalloc_node(sizeof(*rdata),
- GFP_KERNEL | __GFP_ZERO, q->node);
- if (!rdata)
- return NULL;
- memset(rdata, 0, sizeof(*rdata));
- for (i = 0; i < ROWQ_MAX_PRIO; i++) {
- INIT_LIST_HEAD(&rdata->row_queues[i].fifo);
- rdata->row_queues[i].disp_quantum = row_queues_def[i].quantum;
- rdata->row_queues[i].rdata = rdata;
- rdata->row_queues[i].prio = i;
- rdata->row_queues[i].idle_data.begin_idling = false;
- rdata->row_queues[i].idle_data.last_insert_time =
- ktime_set(0, 0);
- }
- rdata->reg_prio_starvation.starvation_limit =
- ROW_REG_STARVATION_TOLLERANCE;
- rdata->low_prio_starvation.starvation_limit =
- ROW_LOW_STARVATION_TOLLERANCE;
- /*
- * Currently idling is enabled only for READ queues. If we want to
- * enable it for write queues also, note that idling frequency will
- * be the same in both cases
- */
- rdata->rd_idle_data.idle_time_ms = ROW_IDLE_TIME_MSEC;
- rdata->rd_idle_data.freq_ms = ROW_READ_FREQ_MSEC;
- hrtimer_init(&rdata->rd_idle_data.hr_timer,
- CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- rdata->rd_idle_data.hr_timer.function = &row_idle_hrtimer_fn;
- INIT_WORK(&rdata->rd_idle_data.idle_work, kick_queue);
- rdata->last_served_ioprio_class = IOPRIO_CLASS_NONE;
- rdata->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
- rdata->dispatch_queue = q;
- return rdata;
- }
- /*
- * row_exit_queue() - called on unloading the RAW scheduler
- * @e: poiner to struct elevator_queue
- *
- */
- static void row_exit_queue(struct elevator_queue *e)
- {
- struct row_data *rd = (struct row_data *)e->elevator_data;
- int i;
- for (i = 0; i < ROWQ_MAX_PRIO; i++)
- BUG_ON(!list_empty(&rd->row_queues[i].fifo));
- if (hrtimer_cancel(&rd->rd_idle_data.hr_timer))
- pr_err("%s(): idle timer was active!", __func__);
- rd->rd_idle_data.idling_queue_idx = ROWQ_MAX_PRIO;
- kfree(rd);
- }
- /*
- * row_merged_requests() - Called when 2 requests are merged
- * @q: requests queue
- * @rq: request the two requests were merged into
- * @next: request that was merged
- */
- static void row_merged_requests(struct request_queue *q, struct request *rq,
- struct request *next)
- {
- struct row_queue *rqueue = RQ_ROWQ(next);
- list_del_init(&next->queuelist);
- rqueue->nr_req--;
- if (rqueue->rdata->pending_urgent_rq == next) {
- pr_err("\n\nROW_WARNING: merging pending urgent!");
- rqueue->rdata->pending_urgent_rq = rq;
- rq->cmd_flags |= REQ_URGENT;
- WARN_ON(!(next->cmd_flags & REQ_URGENT));
- next->cmd_flags &= ~REQ_URGENT;
- }
- rqueue->rdata->nr_reqs[rq_data_dir(rq)]--;
- }
- /*
- * row_get_queue_prio() - Get queue priority for a given request
- *
- * This is a helping function which purpose is to determine what
- * ROW queue the given request should be added to (and
- * dispatched from later on)
- *
- */
- static enum row_queue_prio row_get_queue_prio(struct request *rq,
- struct row_data *rd)
- {
- const int data_dir = rq_data_dir(rq);
- const bool is_sync = rq_is_sync(rq);
- enum row_queue_prio q_type = ROWQ_MAX_PRIO;
- int ioprio_class = IOPRIO_PRIO_CLASS(rq->elv.icq->ioc->ioprio);
- switch (ioprio_class) {
- case IOPRIO_CLASS_RT:
- if (data_dir == READ)
- q_type = ROWQ_PRIO_HIGH_READ;
- else if (is_sync)
- q_type = ROWQ_PRIO_HIGH_SWRITE;
- else {
- pr_err("%s:%s(): got a simple write from RT_CLASS. How???",
- rq->rq_disk->disk_name, __func__);
- q_type = ROWQ_PRIO_REG_WRITE;
- }
- break;
- case IOPRIO_CLASS_IDLE:
- if (data_dir == READ)
- q_type = ROWQ_PRIO_LOW_READ;
- else if (is_sync)
- q_type = ROWQ_PRIO_LOW_SWRITE;
- else {
- pr_err("%s:%s(): got a simple write from IDLE_CLASS. How???",
- rq->rq_disk->disk_name, __func__);
- q_type = ROWQ_PRIO_REG_WRITE;
- }
- break;
- case IOPRIO_CLASS_NONE:
- case IOPRIO_CLASS_BE:
- default:
- if (data_dir == READ)
- q_type = ROWQ_PRIO_REG_READ;
- else if (is_sync)
- q_type = ROWQ_PRIO_REG_SWRITE;
- else
- q_type = ROWQ_PRIO_REG_WRITE;
- break;
- }
- return q_type;
- }
- /*
- * row_set_request() - Set ROW data structures associated with this request.
- * @q: requests queue
- * @rq: pointer to the request
- * @gfp_mask: ignored
- *
- */
- static int
- row_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
- {
- struct row_data *rd = (struct row_data *)q->elevator->elevator_data;
- unsigned long flags;
- spin_lock_irqsave(q->queue_lock, flags);
- rq->elv.priv[0] =
- (void *)(&rd->row_queues[row_get_queue_prio(rq, rd)]);
- spin_unlock_irqrestore(q->queue_lock, flags);
- return 0;
- }
- /********** Helping sysfs functions/defenitions for ROW attributes ******/
- static ssize_t row_var_show(int var, char *page)
- {
- return snprintf(page, 100, "%d\n", var);
- }
- static ssize_t row_var_store(int *var, const char *page, size_t count)
- {
- int err;
- err = kstrtoul(page, 10, (unsigned long *)var);
- return count;
- }
- #define SHOW_FUNCTION(__FUNC, __VAR) \
- static ssize_t __FUNC(struct elevator_queue *e, char *page) \
- { \
- struct row_data *rowd = e->elevator_data; \
- int __data = __VAR; \
- return row_var_show(__data, (page)); \
- }
- SHOW_FUNCTION(row_hp_read_quantum_show,
- rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum);
- SHOW_FUNCTION(row_rp_read_quantum_show,
- rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum);
- SHOW_FUNCTION(row_hp_swrite_quantum_show,
- rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum);
- SHOW_FUNCTION(row_rp_swrite_quantum_show,
- rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum);
- SHOW_FUNCTION(row_rp_write_quantum_show,
- rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum);
- SHOW_FUNCTION(row_lp_read_quantum_show,
- rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum);
- SHOW_FUNCTION(row_lp_swrite_quantum_show,
- rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum);
- SHOW_FUNCTION(row_rd_idle_data_show, rowd->rd_idle_data.idle_time_ms);
- SHOW_FUNCTION(row_rd_idle_data_freq_show, rowd->rd_idle_data.freq_ms);
- SHOW_FUNCTION(row_reg_starv_limit_show,
- rowd->reg_prio_starvation.starvation_limit);
- SHOW_FUNCTION(row_low_starv_limit_show,
- rowd->low_prio_starvation.starvation_limit);
- #undef SHOW_FUNCTION
- #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
- static ssize_t __FUNC(struct elevator_queue *e, \
- const char *page, size_t count) \
- { \
- struct row_data *rowd = e->elevator_data; \
- int __data; \
- int ret = row_var_store(&__data, (page), count); \
- if (__data < (MIN)) \
- __data = (MIN); \
- else if (__data > (MAX)) \
- __data = (MAX); \
- *(__PTR) = __data; \
- return ret; \
- }
- STORE_FUNCTION(row_hp_read_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_HIGH_READ].disp_quantum, 1, INT_MAX);
- STORE_FUNCTION(row_rp_read_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_REG_READ].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_hp_swrite_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_HIGH_SWRITE].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_rp_swrite_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_REG_SWRITE].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_rp_write_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_REG_WRITE].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_lp_read_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_LOW_READ].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_lp_swrite_quantum_store,
- &rowd->row_queues[ROWQ_PRIO_LOW_SWRITE].disp_quantum,
- 1, INT_MAX);
- STORE_FUNCTION(row_rd_idle_data_store, &rowd->rd_idle_data.idle_time_ms,
- 1, INT_MAX);
- STORE_FUNCTION(row_rd_idle_data_freq_store, &rowd->rd_idle_data.freq_ms,
- 1, INT_MAX);
- STORE_FUNCTION(row_reg_starv_limit_store,
- &rowd->reg_prio_starvation.starvation_limit,
- 1, INT_MAX);
- STORE_FUNCTION(row_low_starv_limit_store,
- &rowd->low_prio_starvation.starvation_limit,
- 1, INT_MAX);
- #undef STORE_FUNCTION
- #define ROW_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, row_##name##_show, \
- row_##name##_store)
- static struct elv_fs_entry row_attrs[] = {
- ROW_ATTR(hp_read_quantum),
- ROW_ATTR(rp_read_quantum),
- ROW_ATTR(hp_swrite_quantum),
- ROW_ATTR(rp_swrite_quantum),
- ROW_ATTR(rp_write_quantum),
- ROW_ATTR(lp_read_quantum),
- ROW_ATTR(lp_swrite_quantum),
- ROW_ATTR(rd_idle_data),
- ROW_ATTR(rd_idle_data_freq),
- ROW_ATTR(reg_starv_limit),
- ROW_ATTR(low_starv_limit),
- __ATTR_NULL
- };
- static struct elevator_type iosched_row = {
- .ops = {
- .elevator_merge_req_fn = row_merged_requests,
- .elevator_dispatch_fn = row_dispatch_requests,
- .elevator_add_req_fn = row_add_request,
- .elevator_reinsert_req_fn = row_reinsert_req,
- .elevator_is_urgent_fn = row_urgent_pending,
- .elevator_completed_req_fn = row_completed_req,
- .elevator_former_req_fn = elv_rb_former_request,
- .elevator_latter_req_fn = elv_rb_latter_request,
- .elevator_set_req_fn = row_set_request,
- .elevator_init_fn = row_init_queue,
- .elevator_exit_fn = row_exit_queue,
- },
- .icq_size = sizeof(struct io_cq),
- .icq_align = __alignof__(struct io_cq),
- .elevator_attrs = row_attrs,
- .elevator_name = "row",
- .elevator_owner = THIS_MODULE,
- };
- static int __init row_init(void)
- {
- elv_register(&iosched_row);
- return 0;
- }
- static void __exit row_exit(void)
- {
- elv_unregister(&iosched_row);
- }
- module_init(row_init);
- module_exit(row_exit);
- MODULE_LICENSE("GPLv2");
- MODULE_DESCRIPTION("Read Over Write IO scheduler");
|