bfq-iosched.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. /*
  2. * Header file for the BFQ I/O scheduler: data structures and
  3. * prototypes of interface functions among BFQ components.
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation; either version 2 of the
  8. * License, or (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. */
  15. #ifndef _BFQ_H
  16. #define _BFQ_H
  17. #include <linux/blktrace_api.h>
  18. #include <linux/hrtimer.h>
  19. #include <linux/blk-cgroup.h>
  20. #define BFQ_IOPRIO_CLASSES 3
  21. #define BFQ_CL_IDLE_TIMEOUT (HZ/5)
  22. #define BFQ_MIN_WEIGHT 1
  23. #define BFQ_MAX_WEIGHT 1000
  24. #define BFQ_WEIGHT_CONVERSION_COEFF 10
  25. #define BFQ_DEFAULT_QUEUE_IOPRIO 4
  26. #define BFQ_WEIGHT_LEGACY_DFL 100
  27. #define BFQ_DEFAULT_GRP_IOPRIO 0
  28. #define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
  29. /*
  30. * Soft real-time applications are extremely more latency sensitive
  31. * than interactive ones. Over-raise the weight of the former to
  32. * privilege them against the latter.
  33. */
  34. #define BFQ_SOFTRT_WEIGHT_FACTOR 100
  35. struct bfq_entity;
  36. /**
  37. * struct bfq_service_tree - per ioprio_class service tree.
  38. *
  39. * Each service tree represents a B-WF2Q+ scheduler on its own. Each
  40. * ioprio_class has its own independent scheduler, and so its own
  41. * bfq_service_tree. All the fields are protected by the queue lock
  42. * of the containing bfqd.
  43. */
  44. struct bfq_service_tree {
  45. /* tree for active entities (i.e., those backlogged) */
  46. struct rb_root active;
  47. /* tree for idle entities (i.e., not backlogged, with V < F_i)*/
  48. struct rb_root idle;
  49. /* idle entity with minimum F_i */
  50. struct bfq_entity *first_idle;
  51. /* idle entity with maximum F_i */
  52. struct bfq_entity *last_idle;
  53. /* scheduler virtual time */
  54. u64 vtime;
  55. /* scheduler weight sum; active and idle entities contribute to it */
  56. unsigned long wsum;
  57. };
  58. /**
  59. * struct bfq_sched_data - multi-class scheduler.
  60. *
  61. * bfq_sched_data is the basic scheduler queue. It supports three
  62. * ioprio_classes, and can be used either as a toplevel queue or as an
  63. * intermediate queue in a hierarchical setup.
  64. *
  65. * The supported ioprio_classes are the same as in CFQ, in descending
  66. * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
  67. * Requests from higher priority queues are served before all the
  68. * requests from lower priority queues; among requests of the same
  69. * queue requests are served according to B-WF2Q+.
  70. *
  71. * The schedule is implemented by the service trees, plus the field
  72. * @next_in_service, which points to the entity on the active trees
  73. * that will be served next, if 1) no changes in the schedule occurs
  74. * before the current in-service entity is expired, 2) the in-service
  75. * queue becomes idle when it expires, and 3) if the entity pointed by
  76. * in_service_entity is not a queue, then the in-service child entity
  77. * of the entity pointed by in_service_entity becomes idle on
  78. * expiration. This peculiar definition allows for the following
  79. * optimization, not yet exploited: while a given entity is still in
  80. * service, we already know which is the best candidate for next
  81. * service among the other active entitities in the same parent
  82. * entity. We can then quickly compare the timestamps of the
  83. * in-service entity with those of such best candidate.
  84. *
  85. * All fields are protected by the lock of the containing bfqd.
  86. */
  87. struct bfq_sched_data {
  88. /* entity in service */
  89. struct bfq_entity *in_service_entity;
  90. /* head-of-line entity (see comments above) */
  91. struct bfq_entity *next_in_service;
  92. /* array of service trees, one per ioprio_class */
  93. struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
  94. /* last time CLASS_IDLE was served */
  95. unsigned long bfq_class_idle_last_service;
  96. };
  97. /**
  98. * struct bfq_weight_counter - counter of the number of all active entities
  99. * with a given weight.
  100. */
  101. struct bfq_weight_counter {
  102. unsigned int weight; /* weight of the entities this counter refers to */
  103. unsigned int num_active; /* nr of active entities with this weight */
  104. /*
  105. * Weights tree member (see bfq_data's @queue_weights_tree and
  106. * @group_weights_tree)
  107. */
  108. struct rb_node weights_node;
  109. };
  110. /**
  111. * struct bfq_entity - schedulable entity.
  112. *
  113. * A bfq_entity is used to represent either a bfq_queue (leaf node in the
  114. * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
  115. * entity belongs to the sched_data of the parent group in the cgroup
  116. * hierarchy. Non-leaf entities have also their own sched_data, stored
  117. * in @my_sched_data.
  118. *
  119. * Each entity stores independently its priority values; this would
  120. * allow different weights on different devices, but this
  121. * functionality is not exported to userspace by now. Priorities and
  122. * weights are updated lazily, first storing the new values into the
  123. * new_* fields, then setting the @prio_changed flag. As soon as
  124. * there is a transition in the entity state that allows the priority
  125. * update to take place the effective and the requested priority
  126. * values are synchronized.
  127. *
  128. * Unless cgroups are used, the weight value is calculated from the
  129. * ioprio to export the same interface as CFQ. When dealing with
  130. * ``well-behaved'' queues (i.e., queues that do not spend too much
  131. * time to consume their budget and have true sequential behavior, and
  132. * when there are no external factors breaking anticipation) the
  133. * relative weights at each level of the cgroups hierarchy should be
  134. * guaranteed. All the fields are protected by the queue lock of the
  135. * containing bfqd.
  136. */
  137. struct bfq_entity {
  138. /* service_tree member */
  139. struct rb_node rb_node;
  140. /* pointer to the weight counter associated with this entity */
  141. struct bfq_weight_counter *weight_counter;
  142. /*
  143. * Flag, true if the entity is on a tree (either the active or
  144. * the idle one of its service_tree) or is in service.
  145. */
  146. bool on_st;
  147. /* B-WF2Q+ start and finish timestamps [sectors/weight] */
  148. u64 start, finish;
  149. /* tree the entity is enqueued into; %NULL if not on a tree */
  150. struct rb_root *tree;
  151. /*
  152. * minimum start time of the (active) subtree rooted at this
  153. * entity; used for O(log N) lookups into active trees
  154. */
  155. u64 min_start;
  156. /* amount of service received during the last service slot */
  157. int service;
  158. /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
  159. int budget;
  160. /* weight of the queue */
  161. int weight;
  162. /* next weight if a change is in progress */
  163. int new_weight;
  164. /* original weight, used to implement weight boosting */
  165. int orig_weight;
  166. /* parent entity, for hierarchical scheduling */
  167. struct bfq_entity *parent;
  168. /*
  169. * For non-leaf nodes in the hierarchy, the associated
  170. * scheduler queue, %NULL on leaf nodes.
  171. */
  172. struct bfq_sched_data *my_sched_data;
  173. /* the scheduler queue this entity belongs to */
  174. struct bfq_sched_data *sched_data;
  175. /* flag, set to request a weight, ioprio or ioprio_class change */
  176. int prio_changed;
  177. };
  178. struct bfq_group;
  179. /**
  180. * struct bfq_ttime - per process thinktime stats.
  181. */
  182. struct bfq_ttime {
  183. /* completion time of the last request */
  184. u64 last_end_request;
  185. /* total process thinktime */
  186. u64 ttime_total;
  187. /* number of thinktime samples */
  188. unsigned long ttime_samples;
  189. /* average process thinktime */
  190. u64 ttime_mean;
  191. };
  192. /**
  193. * struct bfq_queue - leaf schedulable entity.
  194. *
  195. * A bfq_queue is a leaf request queue; it can be associated with an
  196. * io_context or more, if it is async or shared between cooperating
  197. * processes. @cgroup holds a reference to the cgroup, to be sure that it
  198. * does not disappear while a bfqq still references it (mostly to avoid
  199. * races between request issuing and task migration followed by cgroup
  200. * destruction).
  201. * All the fields are protected by the queue lock of the containing bfqd.
  202. */
  203. struct bfq_queue {
  204. /* reference counter */
  205. int ref;
  206. /* parent bfq_data */
  207. struct bfq_data *bfqd;
  208. /* current ioprio and ioprio class */
  209. unsigned short ioprio, ioprio_class;
  210. /* next ioprio and ioprio class if a change is in progress */
  211. unsigned short new_ioprio, new_ioprio_class;
  212. /*
  213. * Shared bfq_queue if queue is cooperating with one or more
  214. * other queues.
  215. */
  216. struct bfq_queue *new_bfqq;
  217. /* request-position tree member (see bfq_group's @rq_pos_tree) */
  218. struct rb_node pos_node;
  219. /* request-position tree root (see bfq_group's @rq_pos_tree) */
  220. struct rb_root *pos_root;
  221. /* sorted list of pending requests */
  222. struct rb_root sort_list;
  223. /* if fifo isn't expired, next request to serve */
  224. struct request *next_rq;
  225. /* number of sync and async requests queued */
  226. int queued[2];
  227. /* number of requests currently allocated */
  228. int allocated;
  229. /* number of pending metadata requests */
  230. int meta_pending;
  231. /* fifo list of requests in sort_list */
  232. struct list_head fifo;
  233. /* entity representing this queue in the scheduler */
  234. struct bfq_entity entity;
  235. /* maximum budget allowed from the feedback mechanism */
  236. int max_budget;
  237. /* budget expiration (in jiffies) */
  238. unsigned long budget_timeout;
  239. /* number of requests on the dispatch list or inside driver */
  240. int dispatched;
  241. /* status flags */
  242. unsigned long flags;
  243. /* node for active/idle bfqq list inside parent bfqd */
  244. struct list_head bfqq_list;
  245. /* associated @bfq_ttime struct */
  246. struct bfq_ttime ttime;
  247. /* bit vector: a 1 for each seeky requests in history */
  248. u32 seek_history;
  249. /* node for the device's burst list */
  250. struct hlist_node burst_list_node;
  251. /* position of the last request enqueued */
  252. sector_t last_request_pos;
  253. /* Number of consecutive pairs of request completion and
  254. * arrival, such that the queue becomes idle after the
  255. * completion, but the next request arrives within an idle
  256. * time slice; used only if the queue's IO_bound flag has been
  257. * cleared.
  258. */
  259. unsigned int requests_within_timer;
  260. /* pid of the process owning the queue, used for logging purposes */
  261. pid_t pid;
  262. /*
  263. * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
  264. * if the queue is shared.
  265. */
  266. struct bfq_io_cq *bic;
  267. /* current maximum weight-raising time for this queue */
  268. unsigned long wr_cur_max_time;
  269. /*
  270. * Minimum time instant such that, only if a new request is
  271. * enqueued after this time instant in an idle @bfq_queue with
  272. * no outstanding requests, then the task associated with the
  273. * queue it is deemed as soft real-time (see the comments on
  274. * the function bfq_bfqq_softrt_next_start())
  275. */
  276. unsigned long soft_rt_next_start;
  277. /*
  278. * Start time of the current weight-raising period if
  279. * the @bfq-queue is being weight-raised, otherwise
  280. * finish time of the last weight-raising period.
  281. */
  282. unsigned long last_wr_start_finish;
  283. /* factor by which the weight of this queue is multiplied */
  284. unsigned int wr_coeff;
  285. /*
  286. * Time of the last transition of the @bfq_queue from idle to
  287. * backlogged.
  288. */
  289. unsigned long last_idle_bklogged;
  290. /*
  291. * Cumulative service received from the @bfq_queue since the
  292. * last transition from idle to backlogged.
  293. */
  294. unsigned long service_from_backlogged;
  295. /*
  296. * Value of wr start time when switching to soft rt
  297. */
  298. unsigned long wr_start_at_switch_to_srt;
  299. unsigned long split_time; /* time of last split */
  300. };
  301. /**
  302. * struct bfq_io_cq - per (request_queue, io_context) structure.
  303. */
  304. struct bfq_io_cq {
  305. /* associated io_cq structure */
  306. struct io_cq icq; /* must be the first member */
  307. /* array of two process queues, the sync and the async */
  308. struct bfq_queue *bfqq[2];
  309. /* per (request_queue, blkcg) ioprio */
  310. int ioprio;
  311. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  312. uint64_t blkcg_serial_nr; /* the current blkcg serial */
  313. #endif
  314. /*
  315. * Snapshot of the has_short_time flag before merging; taken
  316. * to remember its value while the queue is merged, so as to
  317. * be able to restore it in case of split.
  318. */
  319. bool saved_has_short_ttime;
  320. /*
  321. * Same purpose as the previous two fields for the I/O bound
  322. * classification of a queue.
  323. */
  324. bool saved_IO_bound;
  325. /*
  326. * Same purpose as the previous fields for the value of the
  327. * field keeping the queue's belonging to a large burst
  328. */
  329. bool saved_in_large_burst;
  330. /*
  331. * True if the queue belonged to a burst list before its merge
  332. * with another cooperating queue.
  333. */
  334. bool was_in_burst_list;
  335. /*
  336. * Similar to previous fields: save wr information.
  337. */
  338. unsigned long saved_wr_coeff;
  339. unsigned long saved_last_wr_start_finish;
  340. unsigned long saved_wr_start_at_switch_to_srt;
  341. unsigned int saved_wr_cur_max_time;
  342. struct bfq_ttime saved_ttime;
  343. };
  344. enum bfq_device_speed {
  345. BFQ_BFQD_FAST,
  346. BFQ_BFQD_SLOW,
  347. };
  348. /**
  349. * struct bfq_data - per-device data structure.
  350. *
  351. * All the fields are protected by @lock.
  352. */
  353. struct bfq_data {
  354. /* device request queue */
  355. struct request_queue *queue;
  356. /* dispatch queue */
  357. struct list_head dispatch;
  358. /* root bfq_group for the device */
  359. struct bfq_group *root_group;
  360. /*
  361. * rbtree of weight counters of @bfq_queues, sorted by
  362. * weight. Used to keep track of whether all @bfq_queues have
  363. * the same weight. The tree contains one counter for each
  364. * distinct weight associated to some active and not
  365. * weight-raised @bfq_queue (see the comments to the functions
  366. * bfq_weights_tree_[add|remove] for further details).
  367. */
  368. struct rb_root queue_weights_tree;
  369. /*
  370. * rbtree of non-queue @bfq_entity weight counters, sorted by
  371. * weight. Used to keep track of whether all @bfq_groups have
  372. * the same weight. The tree contains one counter for each
  373. * distinct weight associated to some active @bfq_group (see
  374. * the comments to the functions bfq_weights_tree_[add|remove]
  375. * for further details).
  376. */
  377. struct rb_root group_weights_tree;
  378. /*
  379. * Number of bfq_queues containing requests (including the
  380. * queue in service, even if it is idling).
  381. */
  382. int busy_queues;
  383. /* number of weight-raised busy @bfq_queues */
  384. int wr_busy_queues;
  385. /* number of queued requests */
  386. int queued;
  387. /* number of requests dispatched and waiting for completion */
  388. int rq_in_driver;
  389. /*
  390. * Maximum number of requests in driver in the last
  391. * @hw_tag_samples completed requests.
  392. */
  393. int max_rq_in_driver;
  394. /* number of samples used to calculate hw_tag */
  395. int hw_tag_samples;
  396. /* flag set to one if the driver is showing a queueing behavior */
  397. int hw_tag;
  398. /* number of budgets assigned */
  399. int budgets_assigned;
  400. /*
  401. * Timer set when idling (waiting) for the next request from
  402. * the queue in service.
  403. */
  404. struct hrtimer idle_slice_timer;
  405. /* bfq_queue in service */
  406. struct bfq_queue *in_service_queue;
  407. /* on-disk position of the last served request */
  408. sector_t last_position;
  409. /* time of last request completion (ns) */
  410. u64 last_completion;
  411. /* time of first rq dispatch in current observation interval (ns) */
  412. u64 first_dispatch;
  413. /* time of last rq dispatch in current observation interval (ns) */
  414. u64 last_dispatch;
  415. /* beginning of the last budget */
  416. ktime_t last_budget_start;
  417. /* beginning of the last idle slice */
  418. ktime_t last_idling_start;
  419. /* number of samples in current observation interval */
  420. int peak_rate_samples;
  421. /* num of samples of seq dispatches in current observation interval */
  422. u32 sequential_samples;
  423. /* total num of sectors transferred in current observation interval */
  424. u64 tot_sectors_dispatched;
  425. /* max rq size seen during current observation interval (sectors) */
  426. u32 last_rq_max_size;
  427. /* time elapsed from first dispatch in current observ. interval (us) */
  428. u64 delta_from_first;
  429. /*
  430. * Current estimate of the device peak rate, measured in
  431. * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
  432. * BFQ_RATE_SHIFT is performed to increase precision in
  433. * fixed-point calculations.
  434. */
  435. u32 peak_rate;
  436. /* maximum budget allotted to a bfq_queue before rescheduling */
  437. int bfq_max_budget;
  438. /* list of all the bfq_queues active on the device */
  439. struct list_head active_list;
  440. /* list of all the bfq_queues idle on the device */
  441. struct list_head idle_list;
  442. /*
  443. * Timeout for async/sync requests; when it fires, requests
  444. * are served in fifo order.
  445. */
  446. u64 bfq_fifo_expire[2];
  447. /* weight of backward seeks wrt forward ones */
  448. unsigned int bfq_back_penalty;
  449. /* maximum allowed backward seek */
  450. unsigned int bfq_back_max;
  451. /* maximum idling time */
  452. u32 bfq_slice_idle;
  453. /* user-configured max budget value (0 for auto-tuning) */
  454. int bfq_user_max_budget;
  455. /*
  456. * Timeout for bfq_queues to consume their budget; used to
  457. * prevent seeky queues from imposing long latencies to
  458. * sequential or quasi-sequential ones (this also implies that
  459. * seeky queues cannot receive guarantees in the service
  460. * domain; after a timeout they are charged for the time they
  461. * have been in service, to preserve fairness among them, but
  462. * without service-domain guarantees).
  463. */
  464. unsigned int bfq_timeout;
  465. /*
  466. * Number of consecutive requests that must be issued within
  467. * the idle time slice to set again idling to a queue which
  468. * was marked as non-I/O-bound (see the definition of the
  469. * IO_bound flag for further details).
  470. */
  471. unsigned int bfq_requests_within_timer;
  472. /*
  473. * Force device idling whenever needed to provide accurate
  474. * service guarantees, without caring about throughput
  475. * issues. CAVEAT: this may even increase latencies, in case
  476. * of useless idling for processes that did stop doing I/O.
  477. */
  478. bool strict_guarantees;
  479. /*
  480. * Last time at which a queue entered the current burst of
  481. * queues being activated shortly after each other; for more
  482. * details about this and the following parameters related to
  483. * a burst of activations, see the comments on the function
  484. * bfq_handle_burst.
  485. */
  486. unsigned long last_ins_in_burst;
  487. /*
  488. * Reference time interval used to decide whether a queue has
  489. * been activated shortly after @last_ins_in_burst.
  490. */
  491. unsigned long bfq_burst_interval;
  492. /* number of queues in the current burst of queue activations */
  493. int burst_size;
  494. /* common parent entity for the queues in the burst */
  495. struct bfq_entity *burst_parent_entity;
  496. /* Maximum burst size above which the current queue-activation
  497. * burst is deemed as 'large'.
  498. */
  499. unsigned long bfq_large_burst_thresh;
  500. /* true if a large queue-activation burst is in progress */
  501. bool large_burst;
  502. /*
  503. * Head of the burst list (as for the above fields, more
  504. * details in the comments on the function bfq_handle_burst).
  505. */
  506. struct hlist_head burst_list;
  507. /* if set to true, low-latency heuristics are enabled */
  508. bool low_latency;
  509. /*
  510. * Maximum factor by which the weight of a weight-raised queue
  511. * is multiplied.
  512. */
  513. unsigned int bfq_wr_coeff;
  514. /* maximum duration of a weight-raising period (jiffies) */
  515. unsigned int bfq_wr_max_time;
  516. /* Maximum weight-raising duration for soft real-time processes */
  517. unsigned int bfq_wr_rt_max_time;
  518. /*
  519. * Minimum idle period after which weight-raising may be
  520. * reactivated for a queue (in jiffies).
  521. */
  522. unsigned int bfq_wr_min_idle_time;
  523. /*
  524. * Minimum period between request arrivals after which
  525. * weight-raising may be reactivated for an already busy async
  526. * queue (in jiffies).
  527. */
  528. unsigned long bfq_wr_min_inter_arr_async;
  529. /* Max service-rate for a soft real-time queue, in sectors/sec */
  530. unsigned int bfq_wr_max_softrt_rate;
  531. /*
  532. * Cached value of the product R*T, used for computing the
  533. * maximum duration of weight raising automatically.
  534. */
  535. u64 RT_prod;
  536. /* device-speed class for the low-latency heuristic */
  537. enum bfq_device_speed device_speed;
  538. /* fallback dummy bfqq for extreme OOM conditions */
  539. struct bfq_queue oom_bfqq;
  540. spinlock_t lock;
  541. /*
  542. * bic associated with the task issuing current bio for
  543. * merging. This and the next field are used as a support to
  544. * be able to perform the bic lookup, needed by bio-merge
  545. * functions, before the scheduler lock is taken, and thus
  546. * avoid taking the request-queue lock while the scheduler
  547. * lock is being held.
  548. */
  549. struct bfq_io_cq *bio_bic;
  550. /* bfqq associated with the task issuing current bio for merging */
  551. struct bfq_queue *bio_bfqq;
  552. };
  553. enum bfqq_state_flags {
  554. BFQQF_just_created = 0, /* queue just allocated */
  555. BFQQF_busy, /* has requests or is in service */
  556. BFQQF_wait_request, /* waiting for a request */
  557. BFQQF_non_blocking_wait_rq, /*
  558. * waiting for a request
  559. * without idling the device
  560. */
  561. BFQQF_fifo_expire, /* FIFO checked in this slice */
  562. BFQQF_has_short_ttime, /* queue has a short think time */
  563. BFQQF_sync, /* synchronous queue */
  564. BFQQF_IO_bound, /*
  565. * bfqq has timed-out at least once
  566. * having consumed at most 2/10 of
  567. * its budget
  568. */
  569. BFQQF_in_large_burst, /*
  570. * bfqq activated in a large burst,
  571. * see comments to bfq_handle_burst.
  572. */
  573. BFQQF_softrt_update, /*
  574. * may need softrt-next-start
  575. * update
  576. */
  577. BFQQF_coop, /* bfqq is shared */
  578. BFQQF_split_coop /* shared bfqq will be split */
  579. };
  580. #define BFQ_BFQQ_FNS(name) \
  581. void bfq_mark_bfqq_##name(struct bfq_queue *bfqq); \
  582. void bfq_clear_bfqq_##name(struct bfq_queue *bfqq); \
  583. int bfq_bfqq_##name(const struct bfq_queue *bfqq);
  584. BFQ_BFQQ_FNS(just_created);
  585. BFQ_BFQQ_FNS(busy);
  586. BFQ_BFQQ_FNS(wait_request);
  587. BFQ_BFQQ_FNS(non_blocking_wait_rq);
  588. BFQ_BFQQ_FNS(fifo_expire);
  589. BFQ_BFQQ_FNS(has_short_ttime);
  590. BFQ_BFQQ_FNS(sync);
  591. BFQ_BFQQ_FNS(IO_bound);
  592. BFQ_BFQQ_FNS(in_large_burst);
  593. BFQ_BFQQ_FNS(coop);
  594. BFQ_BFQQ_FNS(split_coop);
  595. BFQ_BFQQ_FNS(softrt_update);
  596. #undef BFQ_BFQQ_FNS
  597. /* Expiration reasons. */
  598. enum bfqq_expiration {
  599. BFQQE_TOO_IDLE = 0, /*
  600. * queue has been idling for
  601. * too long
  602. */
  603. BFQQE_BUDGET_TIMEOUT, /* budget took too long to be used */
  604. BFQQE_BUDGET_EXHAUSTED, /* budget consumed */
  605. BFQQE_NO_MORE_REQUESTS, /* the queue has no more requests */
  606. BFQQE_PREEMPTED /* preemption in progress */
  607. };
  608. struct bfqg_stats {
  609. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  610. /* number of ios merged */
  611. struct blkg_rwstat merged;
  612. /* total time spent on device in ns, may not be accurate w/ queueing */
  613. struct blkg_rwstat service_time;
  614. /* total time spent waiting in scheduler queue in ns */
  615. struct blkg_rwstat wait_time;
  616. /* number of IOs queued up */
  617. struct blkg_rwstat queued;
  618. /* total disk time and nr sectors dispatched by this group */
  619. struct blkg_stat time;
  620. /* sum of number of ios queued across all samples */
  621. struct blkg_stat avg_queue_size_sum;
  622. /* count of samples taken for average */
  623. struct blkg_stat avg_queue_size_samples;
  624. /* how many times this group has been removed from service tree */
  625. struct blkg_stat dequeue;
  626. /* total time spent waiting for it to be assigned a timeslice. */
  627. struct blkg_stat group_wait_time;
  628. /* time spent idling for this blkcg_gq */
  629. struct blkg_stat idle_time;
  630. /* total time with empty current active q with other requests queued */
  631. struct blkg_stat empty_time;
  632. /* fields after this shouldn't be cleared on stat reset */
  633. uint64_t start_group_wait_time;
  634. uint64_t start_idle_time;
  635. uint64_t start_empty_time;
  636. uint16_t flags;
  637. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  638. };
  639. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  640. /*
  641. * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
  642. *
  643. * @ps: @blkcg_policy_storage that this structure inherits
  644. * @weight: weight of the bfq_group
  645. */
  646. struct bfq_group_data {
  647. /* must be the first member */
  648. struct blkcg_policy_data pd;
  649. unsigned int weight;
  650. };
  651. /**
  652. * struct bfq_group - per (device, cgroup) data structure.
  653. * @entity: schedulable entity to insert into the parent group sched_data.
  654. * @sched_data: own sched_data, to contain child entities (they may be
  655. * both bfq_queues and bfq_groups).
  656. * @bfqd: the bfq_data for the device this group acts upon.
  657. * @async_bfqq: array of async queues for all the tasks belonging to
  658. * the group, one queue per ioprio value per ioprio_class,
  659. * except for the idle class that has only one queue.
  660. * @async_idle_bfqq: async queue for the idle class (ioprio is ignored).
  661. * @my_entity: pointer to @entity, %NULL for the toplevel group; used
  662. * to avoid too many special cases during group creation/
  663. * migration.
  664. * @stats: stats for this bfqg.
  665. * @active_entities: number of active entities belonging to the group;
  666. * unused for the root group. Used to know whether there
  667. * are groups with more than one active @bfq_entity
  668. * (see the comments to the function
  669. * bfq_bfqq_may_idle()).
  670. * @rq_pos_tree: rbtree sorted by next_request position, used when
  671. * determining if two or more queues have interleaving
  672. * requests (see bfq_find_close_cooperator()).
  673. *
  674. * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
  675. * there is a set of bfq_groups, each one collecting the lower-level
  676. * entities belonging to the group that are acting on the same device.
  677. *
  678. * Locking works as follows:
  679. * o @bfqd is protected by the queue lock, RCU is used to access it
  680. * from the readers.
  681. * o All the other fields are protected by the @bfqd queue lock.
  682. */
  683. struct bfq_group {
  684. /* must be the first member */
  685. struct blkg_policy_data pd;
  686. /* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
  687. char blkg_path[128];
  688. /* reference counter (see comments in bfq_bic_update_cgroup) */
  689. int ref;
  690. struct bfq_entity entity;
  691. struct bfq_sched_data sched_data;
  692. void *bfqd;
  693. struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
  694. struct bfq_queue *async_idle_bfqq;
  695. struct bfq_entity *my_entity;
  696. int active_entities;
  697. struct rb_root rq_pos_tree;
  698. struct bfqg_stats stats;
  699. };
  700. #else
  701. struct bfq_group {
  702. struct bfq_sched_data sched_data;
  703. struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
  704. struct bfq_queue *async_idle_bfqq;
  705. struct rb_root rq_pos_tree;
  706. };
  707. #endif
  708. struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
  709. /* --------------- main algorithm interface ----------------- */
  710. #define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \
  711. { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 })
  712. extern const int bfq_timeout;
  713. struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
  714. void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
  715. struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
  716. void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  717. void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
  718. struct rb_root *root);
  719. void bfq_weights_tree_remove(struct bfq_data *bfqd, struct bfq_entity *entity,
  720. struct rb_root *root);
  721. void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  722. bool compensate, enum bfqq_expiration reason);
  723. void bfq_put_queue(struct bfq_queue *bfqq);
  724. void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
  725. void bfq_schedule_dispatch(struct bfq_data *bfqd);
  726. void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
  727. /* ------------ end of main algorithm interface -------------- */
  728. /* ---------------- cgroups-support interface ---------------- */
  729. void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
  730. unsigned int op);
  731. void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
  732. void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
  733. void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
  734. uint64_t io_start_time, unsigned int op);
  735. void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
  736. void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
  737. void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
  738. void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg);
  739. void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg);
  740. void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  741. struct bfq_group *bfqg);
  742. void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg);
  743. void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio);
  744. void bfq_end_wr_async(struct bfq_data *bfqd);
  745. struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
  746. struct blkcg *blkcg);
  747. struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
  748. struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
  749. struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
  750. void bfqg_and_blkg_put(struct bfq_group *bfqg);
  751. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  752. extern struct cftype bfq_blkcg_legacy_files[];
  753. extern struct cftype bfq_blkg_files[];
  754. extern struct blkcg_policy blkcg_policy_bfq;
  755. #endif
  756. /* ------------- end of cgroups-support interface ------------- */
  757. /* - interface of the internal hierarchical B-WF2Q+ scheduler - */
  758. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  759. /* both next loops stop at one of the child entities of the root group */
  760. #define for_each_entity(entity) \
  761. for (; entity ; entity = entity->parent)
  762. /*
  763. * For each iteration, compute parent in advance, so as to be safe if
  764. * entity is deallocated during the iteration. Such a deallocation may
  765. * happen as a consequence of a bfq_put_queue that frees the bfq_queue
  766. * containing entity.
  767. */
  768. #define for_each_entity_safe(entity, parent) \
  769. for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
  770. #else /* CONFIG_BFQ_GROUP_IOSCHED */
  771. /*
  772. * Next two macros are fake loops when cgroups support is not
  773. * enabled. I fact, in such a case, there is only one level to go up
  774. * (to reach the root group).
  775. */
  776. #define for_each_entity(entity) \
  777. for (; entity ; entity = NULL)
  778. #define for_each_entity_safe(entity, parent) \
  779. for (parent = NULL; entity ; entity = parent)
  780. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  781. struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
  782. struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
  783. struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);
  784. struct bfq_entity *bfq_entity_of(struct rb_node *node);
  785. unsigned short bfq_ioprio_to_weight(int ioprio);
  786. void bfq_put_idle_entity(struct bfq_service_tree *st,
  787. struct bfq_entity *entity);
  788. struct bfq_service_tree *
  789. __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
  790. struct bfq_entity *entity,
  791. bool update_class_too);
  792. void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
  793. void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  794. unsigned long time_ms);
  795. bool __bfq_deactivate_entity(struct bfq_entity *entity,
  796. bool ins_into_idle_tree);
  797. bool next_queue_may_preempt(struct bfq_data *bfqd);
  798. struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
  799. void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
  800. void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  801. bool ins_into_idle_tree, bool expiration);
  802. void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  803. void bfq_requeue_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  804. bool expiration);
  805. void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
  806. bool expiration);
  807. void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
  808. /* --------------- end of interface of B-WF2Q+ ---------------- */
  809. /* Logging facilities. */
  810. #ifdef CONFIG_BFQ_GROUP_IOSCHED
  811. struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
  812. #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
  813. blk_add_cgroup_trace_msg((bfqd)->queue, \
  814. bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
  815. "bfq%d%c " fmt, (bfqq)->pid, \
  816. bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
  817. } while (0)
  818. #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
  819. blk_add_cgroup_trace_msg((bfqd)->queue, \
  820. bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
  821. } while (0)
  822. #else /* CONFIG_BFQ_GROUP_IOSCHED */
  823. #define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \
  824. blk_add_trace_msg((bfqd)->queue, "bfq%d%c " fmt, (bfqq)->pid, \
  825. bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
  826. ##args)
  827. #define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
  828. #endif /* CONFIG_BFQ_GROUP_IOSCHED */
  829. #define bfq_log(bfqd, fmt, args...) \
  830. blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)
  831. #endif /* _BFQ_H */