elevator.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170
  1. /*
  2. * Block device elevator/IO-scheduler.
  3. *
  4. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  5. *
  6. * 30042000 Jens Axboe <axboe@kernel.dk> :
  7. *
  8. * Split the elevator a bit so that it is possible to choose a different
  9. * one or even write a new "plug in". There are three pieces:
  10. * - elevator_fn, inserts a new request in the queue list
  11. * - elevator_merge_fn, decides whether a new buffer can be merged with
  12. * an existing request
  13. * - elevator_dequeue_fn, called when a request is taken off the active list
  14. *
  15. * 20082000 Dave Jones <davej@suse.de> :
  16. * Removed tests for max-bomb-segments, which was breaking elvtune
  17. * when run without -bN
  18. *
  19. * Jens:
  20. * - Rework again to work with bio instead of buffer_heads
  21. * - loose bi_dev comparisons, partition handling is right now
  22. * - completely modularize elevator setup and teardown
  23. *
  24. */
  25. #include <linux/kernel.h>
  26. #include <linux/fs.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/elevator.h>
  29. #include <linux/bio.h>
  30. #include <linux/module.h>
  31. #include <linux/slab.h>
  32. #include <linux/init.h>
  33. #include <linux/compiler.h>
  34. #include <linux/blktrace_api.h>
  35. #include <linux/hash.h>
  36. #include <linux/uaccess.h>
  37. #include <linux/pm_runtime.h>
  38. #include <linux/blk-cgroup.h>
  39. #include <trace/events/block.h>
  40. #include "blk.h"
  41. #include "blk-mq-sched.h"
  42. #include "blk-wbt.h"
  43. static DEFINE_SPINLOCK(elv_list_lock);
  44. static LIST_HEAD(elv_list);
  45. /*
  46. * Merge hash stuff.
  47. */
  48. #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
  49. /*
  50. * Query io scheduler to see if the current process issuing bio may be
  51. * merged with rq.
  52. */
  53. static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
  54. {
  55. struct request_queue *q = rq->q;
  56. struct elevator_queue *e = q->elevator;
  57. if (e->uses_mq && e->type->ops.mq.allow_merge)
  58. return e->type->ops.mq.allow_merge(q, rq, bio);
  59. else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
  60. return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
  61. return 1;
  62. }
  63. /*
  64. * can we safely merge with this request?
  65. */
  66. bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
  67. {
  68. if (!blk_rq_merge_ok(rq, bio))
  69. return false;
  70. if (!elv_iosched_allow_bio_merge(rq, bio))
  71. return false;
  72. return true;
  73. }
  74. EXPORT_SYMBOL(elv_bio_merge_ok);
  75. /*
  76. * Return scheduler with name 'name' and with matching 'mq capability
  77. */
  78. static struct elevator_type *elevator_find(const char *name, bool mq)
  79. {
  80. struct elevator_type *e;
  81. list_for_each_entry(e, &elv_list, list) {
  82. if (!strcmp(e->elevator_name, name) && (mq == e->uses_mq))
  83. return e;
  84. }
  85. return NULL;
  86. }
  87. static void elevator_put(struct elevator_type *e)
  88. {
  89. module_put(e->elevator_owner);
  90. }
  91. static struct elevator_type *elevator_get(struct request_queue *q,
  92. const char *name, bool try_loading)
  93. {
  94. struct elevator_type *e;
  95. spin_lock(&elv_list_lock);
  96. e = elevator_find(name, q->mq_ops != NULL);
  97. if (!e && try_loading) {
  98. spin_unlock(&elv_list_lock);
  99. request_module("%s-iosched", name);
  100. spin_lock(&elv_list_lock);
  101. e = elevator_find(name, q->mq_ops != NULL);
  102. }
  103. if (e && !try_module_get(e->elevator_owner))
  104. e = NULL;
  105. spin_unlock(&elv_list_lock);
  106. return e;
  107. }
  108. static char chosen_elevator[ELV_NAME_MAX];
  109. static int __init elevator_setup(char *str)
  110. {
  111. /*
  112. * Be backwards-compatible with previous kernels, so users
  113. * won't get the wrong elevator.
  114. */
  115. strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
  116. return 1;
  117. }
  118. __setup("elevator=", elevator_setup);
  119. /* called during boot to load the elevator chosen by the elevator param */
  120. void __init load_default_elevator_module(void)
  121. {
  122. struct elevator_type *e;
  123. if (!chosen_elevator[0])
  124. return;
  125. /*
  126. * Boot parameter is deprecated, we haven't supported that for MQ.
  127. * Only look for non-mq schedulers from here.
  128. */
  129. spin_lock(&elv_list_lock);
  130. e = elevator_find(chosen_elevator, false);
  131. spin_unlock(&elv_list_lock);
  132. if (!e)
  133. request_module("%s-iosched", chosen_elevator);
  134. }
  135. static struct kobj_type elv_ktype;
  136. struct elevator_queue *elevator_alloc(struct request_queue *q,
  137. struct elevator_type *e)
  138. {
  139. struct elevator_queue *eq;
  140. eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
  141. if (unlikely(!eq))
  142. return NULL;
  143. eq->type = e;
  144. kobject_init(&eq->kobj, &elv_ktype);
  145. mutex_init(&eq->sysfs_lock);
  146. hash_init(eq->hash);
  147. eq->uses_mq = e->uses_mq;
  148. return eq;
  149. }
  150. EXPORT_SYMBOL(elevator_alloc);
  151. static void elevator_release(struct kobject *kobj)
  152. {
  153. struct elevator_queue *e;
  154. e = container_of(kobj, struct elevator_queue, kobj);
  155. elevator_put(e->type);
  156. kfree(e);
  157. }
  158. int elevator_init(struct request_queue *q, char *name)
  159. {
  160. struct elevator_type *e = NULL;
  161. int err;
  162. /*
  163. * q->sysfs_lock must be held to provide mutual exclusion between
  164. * elevator_switch() and here.
  165. */
  166. lockdep_assert_held(&q->sysfs_lock);
  167. if (unlikely(q->elevator))
  168. return 0;
  169. INIT_LIST_HEAD(&q->queue_head);
  170. q->last_merge = NULL;
  171. q->end_sector = 0;
  172. q->boundary_rq = NULL;
  173. if (name) {
  174. e = elevator_get(q, name, true);
  175. if (!e)
  176. return -EINVAL;
  177. }
  178. /*
  179. * Use the default elevator specified by config boot param for
  180. * non-mq devices, or by config option. Don't try to load modules
  181. * as we could be running off async and request_module() isn't
  182. * allowed from async.
  183. */
  184. if (!e && !q->mq_ops && *chosen_elevator) {
  185. e = elevator_get(q, chosen_elevator, false);
  186. if (!e)
  187. printk(KERN_ERR "I/O scheduler %s not found\n",
  188. chosen_elevator);
  189. }
  190. if (!e) {
  191. /*
  192. * For blk-mq devices, we default to using mq-deadline,
  193. * if available, for single queue devices. If deadline
  194. * isn't available OR we have multiple queues, default
  195. * to "none".
  196. */
  197. if (q->mq_ops) {
  198. if (q->nr_hw_queues == 1)
  199. e = elevator_get(q, "mq-deadline", false);
  200. if (!e)
  201. return 0;
  202. } else
  203. e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
  204. if (!e) {
  205. printk(KERN_ERR
  206. "Default I/O scheduler not found. " \
  207. "Using noop.\n");
  208. e = elevator_get(q, "noop", false);
  209. }
  210. }
  211. if (e->uses_mq)
  212. err = blk_mq_init_sched(q, e);
  213. else
  214. err = e->ops.sq.elevator_init_fn(q, e);
  215. if (err)
  216. elevator_put(e);
  217. return err;
  218. }
  219. EXPORT_SYMBOL(elevator_init);
  220. void elevator_exit(struct request_queue *q, struct elevator_queue *e)
  221. {
  222. mutex_lock(&e->sysfs_lock);
  223. if (e->uses_mq && e->type->ops.mq.exit_sched)
  224. blk_mq_exit_sched(q, e);
  225. else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
  226. e->type->ops.sq.elevator_exit_fn(e);
  227. mutex_unlock(&e->sysfs_lock);
  228. kobject_put(&e->kobj);
  229. }
  230. EXPORT_SYMBOL(elevator_exit);
  231. static inline void __elv_rqhash_del(struct request *rq)
  232. {
  233. hash_del(&rq->hash);
  234. rq->rq_flags &= ~RQF_HASHED;
  235. }
  236. void elv_rqhash_del(struct request_queue *q, struct request *rq)
  237. {
  238. if (ELV_ON_HASH(rq))
  239. __elv_rqhash_del(rq);
  240. }
  241. EXPORT_SYMBOL_GPL(elv_rqhash_del);
  242. void elv_rqhash_add(struct request_queue *q, struct request *rq)
  243. {
  244. struct elevator_queue *e = q->elevator;
  245. BUG_ON(ELV_ON_HASH(rq));
  246. hash_add(e->hash, &rq->hash, rq_hash_key(rq));
  247. rq->rq_flags |= RQF_HASHED;
  248. }
  249. EXPORT_SYMBOL_GPL(elv_rqhash_add);
  250. void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
  251. {
  252. __elv_rqhash_del(rq);
  253. elv_rqhash_add(q, rq);
  254. }
  255. struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
  256. {
  257. struct elevator_queue *e = q->elevator;
  258. struct hlist_node *next;
  259. struct request *rq;
  260. hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
  261. BUG_ON(!ELV_ON_HASH(rq));
  262. if (unlikely(!rq_mergeable(rq))) {
  263. __elv_rqhash_del(rq);
  264. continue;
  265. }
  266. if (rq_hash_key(rq) == offset)
  267. return rq;
  268. }
  269. return NULL;
  270. }
  271. /*
  272. * RB-tree support functions for inserting/lookup/removal of requests
  273. * in a sorted RB tree.
  274. */
  275. void elv_rb_add(struct rb_root *root, struct request *rq)
  276. {
  277. struct rb_node **p = &root->rb_node;
  278. struct rb_node *parent = NULL;
  279. struct request *__rq;
  280. while (*p) {
  281. parent = *p;
  282. __rq = rb_entry(parent, struct request, rb_node);
  283. if (blk_rq_pos(rq) < blk_rq_pos(__rq))
  284. p = &(*p)->rb_left;
  285. else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
  286. p = &(*p)->rb_right;
  287. }
  288. rb_link_node(&rq->rb_node, parent, p);
  289. rb_insert_color(&rq->rb_node, root);
  290. }
  291. EXPORT_SYMBOL(elv_rb_add);
  292. void elv_rb_del(struct rb_root *root, struct request *rq)
  293. {
  294. BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
  295. rb_erase(&rq->rb_node, root);
  296. RB_CLEAR_NODE(&rq->rb_node);
  297. }
  298. EXPORT_SYMBOL(elv_rb_del);
  299. struct request *elv_rb_find(struct rb_root *root, sector_t sector)
  300. {
  301. struct rb_node *n = root->rb_node;
  302. struct request *rq;
  303. while (n) {
  304. rq = rb_entry(n, struct request, rb_node);
  305. if (sector < blk_rq_pos(rq))
  306. n = n->rb_left;
  307. else if (sector > blk_rq_pos(rq))
  308. n = n->rb_right;
  309. else
  310. return rq;
  311. }
  312. return NULL;
  313. }
  314. EXPORT_SYMBOL(elv_rb_find);
  315. /*
  316. * Insert rq into dispatch queue of q. Queue lock must be held on
  317. * entry. rq is sort instead into the dispatch queue. To be used by
  318. * specific elevators.
  319. */
  320. void elv_dispatch_sort(struct request_queue *q, struct request *rq)
  321. {
  322. sector_t boundary;
  323. struct list_head *entry;
  324. if (q->last_merge == rq)
  325. q->last_merge = NULL;
  326. elv_rqhash_del(q, rq);
  327. q->nr_sorted--;
  328. boundary = q->end_sector;
  329. list_for_each_prev(entry, &q->queue_head) {
  330. struct request *pos = list_entry_rq(entry);
  331. if (req_op(rq) != req_op(pos))
  332. break;
  333. if (rq_data_dir(rq) != rq_data_dir(pos))
  334. break;
  335. if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
  336. break;
  337. if (blk_rq_pos(rq) >= boundary) {
  338. if (blk_rq_pos(pos) < boundary)
  339. continue;
  340. } else {
  341. if (blk_rq_pos(pos) >= boundary)
  342. break;
  343. }
  344. if (blk_rq_pos(rq) >= blk_rq_pos(pos))
  345. break;
  346. }
  347. list_add(&rq->queuelist, entry);
  348. }
  349. EXPORT_SYMBOL(elv_dispatch_sort);
  350. /*
  351. * Insert rq into dispatch queue of q. Queue lock must be held on
  352. * entry. rq is added to the back of the dispatch queue. To be used by
  353. * specific elevators.
  354. */
  355. void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
  356. {
  357. if (q->last_merge == rq)
  358. q->last_merge = NULL;
  359. elv_rqhash_del(q, rq);
  360. q->nr_sorted--;
  361. q->end_sector = rq_end_sector(rq);
  362. q->boundary_rq = rq;
  363. list_add_tail(&rq->queuelist, &q->queue_head);
  364. }
  365. EXPORT_SYMBOL(elv_dispatch_add_tail);
  366. enum elv_merge elv_merge(struct request_queue *q, struct request **req,
  367. struct bio *bio)
  368. {
  369. struct elevator_queue *e = q->elevator;
  370. struct request *__rq;
  371. /*
  372. * Levels of merges:
  373. * nomerges: No merges at all attempted
  374. * noxmerges: Only simple one-hit cache try
  375. * merges: All merge tries attempted
  376. */
  377. if (blk_queue_nomerges(q) || !bio_mergeable(bio))
  378. return ELEVATOR_NO_MERGE;
  379. /*
  380. * First try one-hit cache.
  381. */
  382. if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
  383. enum elv_merge ret = blk_try_merge(q->last_merge, bio);
  384. if (ret != ELEVATOR_NO_MERGE) {
  385. *req = q->last_merge;
  386. return ret;
  387. }
  388. }
  389. if (blk_queue_noxmerges(q))
  390. return ELEVATOR_NO_MERGE;
  391. /*
  392. * See if our hash lookup can find a potential backmerge.
  393. */
  394. __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
  395. if (__rq && elv_bio_merge_ok(__rq, bio)) {
  396. *req = __rq;
  397. return ELEVATOR_BACK_MERGE;
  398. }
  399. if (e->uses_mq && e->type->ops.mq.request_merge)
  400. return e->type->ops.mq.request_merge(q, req, bio);
  401. else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
  402. return e->type->ops.sq.elevator_merge_fn(q, req, bio);
  403. return ELEVATOR_NO_MERGE;
  404. }
  405. /*
  406. * Attempt to do an insertion back merge. Only check for the case where
  407. * we can append 'rq' to an existing request, so we can throw 'rq' away
  408. * afterwards.
  409. *
  410. * Returns true if we merged, false otherwise
  411. */
  412. bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
  413. {
  414. struct request *__rq;
  415. bool ret;
  416. if (blk_queue_nomerges(q))
  417. return false;
  418. /*
  419. * First try one-hit cache.
  420. */
  421. if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
  422. return true;
  423. if (blk_queue_noxmerges(q))
  424. return false;
  425. ret = false;
  426. /*
  427. * See if our hash lookup can find a potential backmerge.
  428. */
  429. while (1) {
  430. __rq = elv_rqhash_find(q, blk_rq_pos(rq));
  431. if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
  432. break;
  433. /* The merged request could be merged with others, try again */
  434. ret = true;
  435. rq = __rq;
  436. }
  437. return ret;
  438. }
  439. void elv_merged_request(struct request_queue *q, struct request *rq,
  440. enum elv_merge type)
  441. {
  442. struct elevator_queue *e = q->elevator;
  443. if (e->uses_mq && e->type->ops.mq.request_merged)
  444. e->type->ops.mq.request_merged(q, rq, type);
  445. else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
  446. e->type->ops.sq.elevator_merged_fn(q, rq, type);
  447. if (type == ELEVATOR_BACK_MERGE)
  448. elv_rqhash_reposition(q, rq);
  449. q->last_merge = rq;
  450. }
  451. void elv_merge_requests(struct request_queue *q, struct request *rq,
  452. struct request *next)
  453. {
  454. struct elevator_queue *e = q->elevator;
  455. bool next_sorted = false;
  456. if (e->uses_mq && e->type->ops.mq.requests_merged)
  457. e->type->ops.mq.requests_merged(q, rq, next);
  458. else if (e->type->ops.sq.elevator_merge_req_fn) {
  459. next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
  460. if (next_sorted)
  461. e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
  462. }
  463. elv_rqhash_reposition(q, rq);
  464. if (next_sorted) {
  465. elv_rqhash_del(q, next);
  466. q->nr_sorted--;
  467. }
  468. q->last_merge = rq;
  469. }
  470. void elv_bio_merged(struct request_queue *q, struct request *rq,
  471. struct bio *bio)
  472. {
  473. struct elevator_queue *e = q->elevator;
  474. if (WARN_ON_ONCE(e->uses_mq))
  475. return;
  476. if (e->type->ops.sq.elevator_bio_merged_fn)
  477. e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
  478. }
  479. #ifdef CONFIG_PM
  480. static void blk_pm_requeue_request(struct request *rq)
  481. {
  482. if (rq->q->dev && !(rq->rq_flags & RQF_PM))
  483. rq->q->nr_pending--;
  484. }
  485. static void blk_pm_add_request(struct request_queue *q, struct request *rq)
  486. {
  487. if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
  488. (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
  489. pm_request_resume(q->dev);
  490. }
  491. #else
  492. static inline void blk_pm_requeue_request(struct request *rq) {}
  493. static inline void blk_pm_add_request(struct request_queue *q,
  494. struct request *rq)
  495. {
  496. }
  497. #endif
  498. void elv_requeue_request(struct request_queue *q, struct request *rq)
  499. {
  500. /*
  501. * it already went through dequeue, we need to decrement the
  502. * in_flight count again
  503. */
  504. if (blk_account_rq(rq)) {
  505. q->in_flight[rq_is_sync(rq)]--;
  506. if (rq->rq_flags & RQF_SORTED)
  507. elv_deactivate_rq(q, rq);
  508. }
  509. rq->rq_flags &= ~RQF_STARTED;
  510. blk_pm_requeue_request(rq);
  511. __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
  512. }
  513. void elv_drain_elevator(struct request_queue *q)
  514. {
  515. struct elevator_queue *e = q->elevator;
  516. static int printed;
  517. if (WARN_ON_ONCE(e->uses_mq))
  518. return;
  519. lockdep_assert_held(q->queue_lock);
  520. while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
  521. ;
  522. if (q->nr_sorted && printed++ < 10) {
  523. printk(KERN_ERR "%s: forced dispatching is broken "
  524. "(nr_sorted=%u), please report this\n",
  525. q->elevator->type->elevator_name, q->nr_sorted);
  526. }
  527. }
  528. void __elv_add_request(struct request_queue *q, struct request *rq, int where)
  529. {
  530. trace_block_rq_insert(q, rq);
  531. blk_pm_add_request(q, rq);
  532. rq->q = q;
  533. if (rq->rq_flags & RQF_SOFTBARRIER) {
  534. /* barriers are scheduling boundary, update end_sector */
  535. if (!blk_rq_is_passthrough(rq)) {
  536. q->end_sector = rq_end_sector(rq);
  537. q->boundary_rq = rq;
  538. }
  539. } else if (!(rq->rq_flags & RQF_ELVPRIV) &&
  540. (where == ELEVATOR_INSERT_SORT ||
  541. where == ELEVATOR_INSERT_SORT_MERGE))
  542. where = ELEVATOR_INSERT_BACK;
  543. switch (where) {
  544. case ELEVATOR_INSERT_REQUEUE:
  545. case ELEVATOR_INSERT_FRONT:
  546. rq->rq_flags |= RQF_SOFTBARRIER;
  547. list_add(&rq->queuelist, &q->queue_head);
  548. break;
  549. case ELEVATOR_INSERT_BACK:
  550. rq->rq_flags |= RQF_SOFTBARRIER;
  551. elv_drain_elevator(q);
  552. list_add_tail(&rq->queuelist, &q->queue_head);
  553. /*
  554. * We kick the queue here for the following reasons.
  555. * - The elevator might have returned NULL previously
  556. * to delay requests and returned them now. As the
  557. * queue wasn't empty before this request, ll_rw_blk
  558. * won't run the queue on return, resulting in hang.
  559. * - Usually, back inserted requests won't be merged
  560. * with anything. There's no point in delaying queue
  561. * processing.
  562. */
  563. __blk_run_queue(q);
  564. break;
  565. case ELEVATOR_INSERT_SORT_MERGE:
  566. /*
  567. * If we succeed in merging this request with one in the
  568. * queue already, we are done - rq has now been freed,
  569. * so no need to do anything further.
  570. */
  571. if (elv_attempt_insert_merge(q, rq))
  572. break;
  573. /* fall through */
  574. case ELEVATOR_INSERT_SORT:
  575. BUG_ON(blk_rq_is_passthrough(rq));
  576. rq->rq_flags |= RQF_SORTED;
  577. q->nr_sorted++;
  578. if (rq_mergeable(rq)) {
  579. elv_rqhash_add(q, rq);
  580. if (!q->last_merge)
  581. q->last_merge = rq;
  582. }
  583. /*
  584. * Some ioscheds (cfq) run q->request_fn directly, so
  585. * rq cannot be accessed after calling
  586. * elevator_add_req_fn.
  587. */
  588. q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
  589. break;
  590. case ELEVATOR_INSERT_FLUSH:
  591. rq->rq_flags |= RQF_SOFTBARRIER;
  592. blk_insert_flush(rq);
  593. break;
  594. default:
  595. printk(KERN_ERR "%s: bad insertion point %d\n",
  596. __func__, where);
  597. BUG();
  598. }
  599. }
  600. EXPORT_SYMBOL(__elv_add_request);
  601. void elv_add_request(struct request_queue *q, struct request *rq, int where)
  602. {
  603. unsigned long flags;
  604. spin_lock_irqsave(q->queue_lock, flags);
  605. __elv_add_request(q, rq, where);
  606. spin_unlock_irqrestore(q->queue_lock, flags);
  607. }
  608. EXPORT_SYMBOL(elv_add_request);
  609. struct request *elv_latter_request(struct request_queue *q, struct request *rq)
  610. {
  611. struct elevator_queue *e = q->elevator;
  612. if (e->uses_mq && e->type->ops.mq.next_request)
  613. return e->type->ops.mq.next_request(q, rq);
  614. else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
  615. return e->type->ops.sq.elevator_latter_req_fn(q, rq);
  616. return NULL;
  617. }
  618. struct request *elv_former_request(struct request_queue *q, struct request *rq)
  619. {
  620. struct elevator_queue *e = q->elevator;
  621. if (e->uses_mq && e->type->ops.mq.former_request)
  622. return e->type->ops.mq.former_request(q, rq);
  623. if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
  624. return e->type->ops.sq.elevator_former_req_fn(q, rq);
  625. return NULL;
  626. }
  627. int elv_set_request(struct request_queue *q, struct request *rq,
  628. struct bio *bio, gfp_t gfp_mask)
  629. {
  630. struct elevator_queue *e = q->elevator;
  631. if (WARN_ON_ONCE(e->uses_mq))
  632. return 0;
  633. if (e->type->ops.sq.elevator_set_req_fn)
  634. return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
  635. return 0;
  636. }
  637. void elv_put_request(struct request_queue *q, struct request *rq)
  638. {
  639. struct elevator_queue *e = q->elevator;
  640. if (WARN_ON_ONCE(e->uses_mq))
  641. return;
  642. if (e->type->ops.sq.elevator_put_req_fn)
  643. e->type->ops.sq.elevator_put_req_fn(rq);
  644. }
  645. int elv_may_queue(struct request_queue *q, unsigned int op)
  646. {
  647. struct elevator_queue *e = q->elevator;
  648. if (WARN_ON_ONCE(e->uses_mq))
  649. return 0;
  650. if (e->type->ops.sq.elevator_may_queue_fn)
  651. return e->type->ops.sq.elevator_may_queue_fn(q, op);
  652. return ELV_MQUEUE_MAY;
  653. }
  654. void elv_completed_request(struct request_queue *q, struct request *rq)
  655. {
  656. struct elevator_queue *e = q->elevator;
  657. if (WARN_ON_ONCE(e->uses_mq))
  658. return;
  659. /*
  660. * request is released from the driver, io must be done
  661. */
  662. if (blk_account_rq(rq)) {
  663. q->in_flight[rq_is_sync(rq)]--;
  664. if ((rq->rq_flags & RQF_SORTED) &&
  665. e->type->ops.sq.elevator_completed_req_fn)
  666. e->type->ops.sq.elevator_completed_req_fn(q, rq);
  667. }
  668. }
  669. #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
  670. static ssize_t
  671. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  672. {
  673. struct elv_fs_entry *entry = to_elv(attr);
  674. struct elevator_queue *e;
  675. ssize_t error;
  676. if (!entry->show)
  677. return -EIO;
  678. e = container_of(kobj, struct elevator_queue, kobj);
  679. mutex_lock(&e->sysfs_lock);
  680. error = e->type ? entry->show(e, page) : -ENOENT;
  681. mutex_unlock(&e->sysfs_lock);
  682. return error;
  683. }
  684. static ssize_t
  685. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  686. const char *page, size_t length)
  687. {
  688. struct elv_fs_entry *entry = to_elv(attr);
  689. struct elevator_queue *e;
  690. ssize_t error;
  691. if (!entry->store)
  692. return -EIO;
  693. e = container_of(kobj, struct elevator_queue, kobj);
  694. mutex_lock(&e->sysfs_lock);
  695. error = e->type ? entry->store(e, page, length) : -ENOENT;
  696. mutex_unlock(&e->sysfs_lock);
  697. return error;
  698. }
  699. static const struct sysfs_ops elv_sysfs_ops = {
  700. .show = elv_attr_show,
  701. .store = elv_attr_store,
  702. };
  703. static struct kobj_type elv_ktype = {
  704. .sysfs_ops = &elv_sysfs_ops,
  705. .release = elevator_release,
  706. };
  707. int elv_register_queue(struct request_queue *q)
  708. {
  709. struct elevator_queue *e = q->elevator;
  710. int error;
  711. error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
  712. if (!error) {
  713. struct elv_fs_entry *attr = e->type->elevator_attrs;
  714. if (attr) {
  715. while (attr->attr.name) {
  716. if (sysfs_create_file(&e->kobj, &attr->attr))
  717. break;
  718. attr++;
  719. }
  720. }
  721. kobject_uevent(&e->kobj, KOBJ_ADD);
  722. e->registered = 1;
  723. if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
  724. e->type->ops.sq.elevator_registered_fn(q);
  725. }
  726. return error;
  727. }
  728. EXPORT_SYMBOL(elv_register_queue);
  729. void elv_unregister_queue(struct request_queue *q)
  730. {
  731. if (q) {
  732. struct elevator_queue *e = q->elevator;
  733. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  734. kobject_del(&e->kobj);
  735. e->registered = 0;
  736. /* Re-enable throttling in case elevator disabled it */
  737. wbt_enable_default(q);
  738. }
  739. }
  740. EXPORT_SYMBOL(elv_unregister_queue);
  741. int elv_register(struct elevator_type *e)
  742. {
  743. char *def = "";
  744. /* create icq_cache if requested */
  745. if (e->icq_size) {
  746. if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
  747. WARN_ON(e->icq_align < __alignof__(struct io_cq)))
  748. return -EINVAL;
  749. snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
  750. "%s_io_cq", e->elevator_name);
  751. e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
  752. e->icq_align, 0, NULL);
  753. if (!e->icq_cache)
  754. return -ENOMEM;
  755. }
  756. /* register, don't allow duplicate names */
  757. spin_lock(&elv_list_lock);
  758. if (elevator_find(e->elevator_name, e->uses_mq)) {
  759. spin_unlock(&elv_list_lock);
  760. if (e->icq_cache)
  761. kmem_cache_destroy(e->icq_cache);
  762. return -EBUSY;
  763. }
  764. list_add_tail(&e->list, &elv_list);
  765. spin_unlock(&elv_list_lock);
  766. /* print pretty message */
  767. if (!strcmp(e->elevator_name, chosen_elevator) ||
  768. (!*chosen_elevator &&
  769. !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
  770. def = " (default)";
  771. printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
  772. def);
  773. return 0;
  774. }
  775. EXPORT_SYMBOL_GPL(elv_register);
  776. void elv_unregister(struct elevator_type *e)
  777. {
  778. /* unregister */
  779. spin_lock(&elv_list_lock);
  780. list_del_init(&e->list);
  781. spin_unlock(&elv_list_lock);
  782. /*
  783. * Destroy icq_cache if it exists. icq's are RCU managed. Make
  784. * sure all RCU operations are complete before proceeding.
  785. */
  786. if (e->icq_cache) {
  787. rcu_barrier();
  788. kmem_cache_destroy(e->icq_cache);
  789. e->icq_cache = NULL;
  790. }
  791. }
  792. EXPORT_SYMBOL_GPL(elv_unregister);
  793. static int elevator_switch_mq(struct request_queue *q,
  794. struct elevator_type *new_e)
  795. {
  796. int ret;
  797. blk_mq_freeze_queue(q);
  798. if (q->elevator) {
  799. if (q->elevator->registered)
  800. elv_unregister_queue(q);
  801. ioc_clear_queue(q);
  802. elevator_exit(q, q->elevator);
  803. }
  804. ret = blk_mq_init_sched(q, new_e);
  805. if (ret)
  806. goto out;
  807. if (new_e) {
  808. ret = elv_register_queue(q);
  809. if (ret) {
  810. elevator_exit(q, q->elevator);
  811. goto out;
  812. }
  813. }
  814. if (new_e)
  815. blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
  816. else
  817. blk_add_trace_msg(q, "elv switch: none");
  818. out:
  819. blk_mq_unfreeze_queue(q);
  820. return ret;
  821. }
  822. /*
  823. * switch to new_e io scheduler. be careful not to introduce deadlocks -
  824. * we don't free the old io scheduler, before we have allocated what we
  825. * need for the new one. this way we have a chance of going back to the old
  826. * one, if the new one fails init for some reason.
  827. */
  828. static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
  829. {
  830. struct elevator_queue *old = q->elevator;
  831. bool old_registered = false;
  832. int err;
  833. if (q->mq_ops)
  834. return elevator_switch_mq(q, new_e);
  835. /*
  836. * Turn on BYPASS and drain all requests w/ elevator private data.
  837. * Block layer doesn't call into a quiesced elevator - all requests
  838. * are directly put on the dispatch list without elevator data
  839. * using INSERT_BACK. All requests have SOFTBARRIER set and no
  840. * merge happens either.
  841. */
  842. if (old) {
  843. old_registered = old->registered;
  844. blk_queue_bypass_start(q);
  845. /* unregister and clear all auxiliary data of the old elevator */
  846. if (old_registered)
  847. elv_unregister_queue(q);
  848. ioc_clear_queue(q);
  849. }
  850. /* allocate, init and register new elevator */
  851. err = new_e->ops.sq.elevator_init_fn(q, new_e);
  852. if (err)
  853. goto fail_init;
  854. err = elv_register_queue(q);
  855. if (err)
  856. goto fail_register;
  857. /* done, kill the old one and finish */
  858. if (old) {
  859. elevator_exit(q, old);
  860. blk_queue_bypass_end(q);
  861. }
  862. blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
  863. return 0;
  864. fail_register:
  865. elevator_exit(q, q->elevator);
  866. fail_init:
  867. /* switch failed, restore and re-register old elevator */
  868. if (old) {
  869. q->elevator = old;
  870. elv_register_queue(q);
  871. blk_queue_bypass_end(q);
  872. }
  873. return err;
  874. }
  875. /*
  876. * Switch this queue to the given IO scheduler.
  877. */
  878. static int __elevator_change(struct request_queue *q, const char *name)
  879. {
  880. char elevator_name[ELV_NAME_MAX];
  881. struct elevator_type *e;
  882. /* Make sure queue is not in the middle of being removed */
  883. if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
  884. return -ENOENT;
  885. /*
  886. * Special case for mq, turn off scheduling
  887. */
  888. if (q->mq_ops && !strncmp(name, "none", 4))
  889. return elevator_switch(q, NULL);
  890. strlcpy(elevator_name, name, sizeof(elevator_name));
  891. e = elevator_get(q, strstrip(elevator_name), true);
  892. if (!e)
  893. return -EINVAL;
  894. if (q->elevator &&
  895. !strcmp(elevator_name, q->elevator->type->elevator_name)) {
  896. elevator_put(e);
  897. return 0;
  898. }
  899. return elevator_switch(q, e);
  900. }
  901. static inline bool elv_support_iosched(struct request_queue *q)
  902. {
  903. if (q->mq_ops && q->tag_set && (q->tag_set->flags &
  904. BLK_MQ_F_NO_SCHED))
  905. return false;
  906. return true;
  907. }
  908. ssize_t elv_iosched_store(struct request_queue *q, const char *name,
  909. size_t count)
  910. {
  911. int ret;
  912. if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
  913. return count;
  914. ret = __elevator_change(q, name);
  915. if (!ret)
  916. return count;
  917. return ret;
  918. }
  919. ssize_t elv_iosched_show(struct request_queue *q, char *name)
  920. {
  921. struct elevator_queue *e = q->elevator;
  922. struct elevator_type *elv = NULL;
  923. struct elevator_type *__e;
  924. int len = 0;
  925. if (!blk_queue_stackable(q))
  926. return sprintf(name, "none\n");
  927. if (!q->elevator)
  928. len += sprintf(name+len, "[none] ");
  929. else
  930. elv = e->type;
  931. spin_lock(&elv_list_lock);
  932. list_for_each_entry(__e, &elv_list, list) {
  933. if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
  934. len += sprintf(name+len, "[%s] ", elv->elevator_name);
  935. continue;
  936. }
  937. if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
  938. len += sprintf(name+len, "%s ", __e->elevator_name);
  939. else if (!__e->uses_mq && !q->mq_ops)
  940. len += sprintf(name+len, "%s ", __e->elevator_name);
  941. }
  942. spin_unlock(&elv_list_lock);
  943. if (q->mq_ops && q->elevator)
  944. len += sprintf(name+len, "none");
  945. len += sprintf(len+name, "\n");
  946. return len;
  947. }
  948. struct request *elv_rb_former_request(struct request_queue *q,
  949. struct request *rq)
  950. {
  951. struct rb_node *rbprev = rb_prev(&rq->rb_node);
  952. if (rbprev)
  953. return rb_entry_rq(rbprev);
  954. return NULL;
  955. }
  956. EXPORT_SYMBOL(elv_rb_former_request);
  957. struct request *elv_rb_latter_request(struct request_queue *q,
  958. struct request *rq)
  959. {
  960. struct rb_node *rbnext = rb_next(&rq->rb_node);
  961. if (rbnext)
  962. return rb_entry_rq(rbnext);
  963. return NULL;
  964. }
  965. EXPORT_SYMBOL(elv_rb_latter_request);