elevator.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. /*
  2. * Block device elevator/IO-scheduler.
  3. *
  4. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  5. *
  6. * 30042000 Jens Axboe <axboe@kernel.dk> :
  7. *
  8. * Split the elevator a bit so that it is possible to choose a different
  9. * one or even write a new "plug in". There are three pieces:
  10. * - elevator_fn, inserts a new request in the queue list
  11. * - elevator_merge_fn, decides whether a new buffer can be merged with
  12. * an existing request
  13. * - elevator_dequeue_fn, called when a request is taken off the active list
  14. *
  15. * 20082000 Dave Jones <davej@suse.de> :
  16. * Removed tests for max-bomb-segments, which was breaking elvtune
  17. * when run without -bN
  18. *
  19. * Jens:
  20. * - Rework again to work with bio instead of buffer_heads
  21. * - loose bi_dev comparisons, partition handling is right now
  22. * - completely modularize elevator setup and teardown
  23. *
  24. */
  25. #include <linux/kernel.h>
  26. #include <linux/fs.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/elevator.h>
  29. #include <linux/bio.h>
  30. #include <linux/module.h>
  31. #include <linux/slab.h>
  32. #include <linux/init.h>
  33. #include <linux/compiler.h>
  34. #include <linux/blktrace_api.h>
  35. #include <linux/hash.h>
  36. #include <linux/uaccess.h>
  37. #include <linux/pm_runtime.h>
  38. #include <trace/events/block.h>
  39. #include "blk.h"
  40. #include "blk-cgroup.h"
  41. static DEFINE_SPINLOCK(elv_list_lock);
  42. static LIST_HEAD(elv_list);
  43. /*
  44. * Merge hash stuff.
  45. */
  46. #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
  47. /*
  48. * Query io scheduler to see if the current process issuing bio may be
  49. * merged with rq.
  50. */
  51. static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
  52. {
  53. struct request_queue *q = rq->q;
  54. struct elevator_queue *e = q->elevator;
  55. if (e->type->ops.elevator_allow_merge_fn)
  56. return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
  57. return 1;
  58. }
  59. /*
  60. * can we safely merge with this request?
  61. */
  62. bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
  63. {
  64. if (!rq_mergeable(rq))
  65. return 0;
  66. /*
  67. * Don't merge file system requests and discard requests
  68. */
  69. if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
  70. return 0;
  71. /*
  72. * Don't merge discard requests and secure discard requests
  73. */
  74. if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
  75. return 0;
  76. /*
  77. * Don't merge sanitize requests
  78. */
  79. if ((bio->bi_rw & REQ_SANITIZE) != (rq->bio->bi_rw & REQ_SANITIZE))
  80. return 0;
  81. /*
  82. * different data direction or already started, don't merge
  83. */
  84. if (bio_data_dir(bio) != rq_data_dir(rq))
  85. return 0;
  86. /*
  87. * must be same device and not a special request
  88. */
  89. if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
  90. return 0;
  91. /*
  92. * only merge integrity protected bio into ditto rq
  93. */
  94. if (bio_integrity(bio) != blk_integrity_rq(rq))
  95. return 0;
  96. if (!elv_iosched_allow_merge(rq, bio))
  97. return 0;
  98. return 1;
  99. }
  100. EXPORT_SYMBOL(elv_rq_merge_ok);
  101. static struct elevator_type *elevator_find(const char *name)
  102. {
  103. struct elevator_type *e;
  104. list_for_each_entry(e, &elv_list, list) {
  105. if (!strcmp(e->elevator_name, name))
  106. return e;
  107. }
  108. return NULL;
  109. }
  110. static void elevator_put(struct elevator_type *e)
  111. {
  112. module_put(e->elevator_owner);
  113. }
  114. static struct elevator_type *elevator_get(const char *name)
  115. {
  116. struct elevator_type *e;
  117. spin_lock(&elv_list_lock);
  118. e = elevator_find(name);
  119. if (!e) {
  120. spin_unlock(&elv_list_lock);
  121. request_module("%s-iosched", name);
  122. spin_lock(&elv_list_lock);
  123. e = elevator_find(name);
  124. }
  125. if (e && !try_module_get(e->elevator_owner))
  126. e = NULL;
  127. spin_unlock(&elv_list_lock);
  128. return e;
  129. }
  130. static char chosen_elevator[ELV_NAME_MAX];
  131. static int __init elevator_setup(char *str)
  132. {
  133. /*
  134. * Be backwards-compatible with previous kernels, so users
  135. * won't get the wrong elevator.
  136. */
  137. strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
  138. return 1;
  139. }
  140. __setup("elevator=", elevator_setup);
  141. static struct kobj_type elv_ktype;
  142. static struct elevator_queue *elevator_alloc(struct request_queue *q,
  143. struct elevator_type *e)
  144. {
  145. struct elevator_queue *eq;
  146. eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
  147. if (unlikely(!eq))
  148. goto err;
  149. eq->type = e;
  150. kobject_init(&eq->kobj, &elv_ktype);
  151. mutex_init(&eq->sysfs_lock);
  152. hash_init(eq->hash);
  153. return eq;
  154. err:
  155. kfree(eq);
  156. elevator_put(e);
  157. return NULL;
  158. }
  159. static void elevator_release(struct kobject *kobj)
  160. {
  161. struct elevator_queue *e;
  162. e = container_of(kobj, struct elevator_queue, kobj);
  163. elevator_put(e->type);
  164. kfree(e);
  165. }
  166. int elevator_init(struct request_queue *q, char *name)
  167. {
  168. struct elevator_type *e = NULL;
  169. int err;
  170. if (unlikely(q->elevator))
  171. return 0;
  172. INIT_LIST_HEAD(&q->queue_head);
  173. q->last_merge = NULL;
  174. q->end_sector = 0;
  175. q->boundary_rq = NULL;
  176. if (name) {
  177. e = elevator_get(name);
  178. if (!e)
  179. return -EINVAL;
  180. }
  181. if (!e && *chosen_elevator) {
  182. e = elevator_get(chosen_elevator);
  183. if (!e)
  184. printk(KERN_ERR "I/O scheduler %s not found\n",
  185. chosen_elevator);
  186. }
  187. if (!e) {
  188. e = elevator_get(CONFIG_DEFAULT_IOSCHED);
  189. if (!e) {
  190. printk(KERN_ERR
  191. "Default I/O scheduler not found. " \
  192. "Using noop.\n");
  193. e = elevator_get("noop");
  194. }
  195. }
  196. q->elevator = elevator_alloc(q, e);
  197. if (!q->elevator)
  198. return -ENOMEM;
  199. err = e->ops.elevator_init_fn(q);
  200. if (err) {
  201. kobject_put(&q->elevator->kobj);
  202. return err;
  203. }
  204. return 0;
  205. }
  206. EXPORT_SYMBOL(elevator_init);
  207. void elevator_exit(struct elevator_queue *e)
  208. {
  209. mutex_lock(&e->sysfs_lock);
  210. if (e->type->ops.elevator_exit_fn)
  211. e->type->ops.elevator_exit_fn(e);
  212. mutex_unlock(&e->sysfs_lock);
  213. kobject_put(&e->kobj);
  214. }
  215. EXPORT_SYMBOL(elevator_exit);
  216. static inline void __elv_rqhash_del(struct request *rq)
  217. {
  218. hash_del(&rq->hash);
  219. }
  220. static void elv_rqhash_del(struct request_queue *q, struct request *rq)
  221. {
  222. if (ELV_ON_HASH(rq))
  223. __elv_rqhash_del(rq);
  224. }
  225. static void elv_rqhash_add(struct request_queue *q, struct request *rq)
  226. {
  227. struct elevator_queue *e = q->elevator;
  228. BUG_ON(ELV_ON_HASH(rq));
  229. hash_add(e->hash, &rq->hash, rq_hash_key(rq));
  230. }
  231. static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
  232. {
  233. __elv_rqhash_del(rq);
  234. elv_rqhash_add(q, rq);
  235. }
  236. static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
  237. {
  238. struct elevator_queue *e = q->elevator;
  239. struct hlist_node *entry, *next;
  240. struct request *rq;
  241. hash_for_each_possible_safe(e->hash, rq, entry, next, hash, offset) {
  242. BUG_ON(!ELV_ON_HASH(rq));
  243. if (unlikely(!rq_mergeable(rq))) {
  244. __elv_rqhash_del(rq);
  245. continue;
  246. }
  247. if (rq_hash_key(rq) == offset)
  248. return rq;
  249. }
  250. return NULL;
  251. }
  252. /*
  253. * RB-tree support functions for inserting/lookup/removal of requests
  254. * in a sorted RB tree.
  255. */
  256. void elv_rb_add(struct rb_root *root, struct request *rq)
  257. {
  258. struct rb_node **p = &root->rb_node;
  259. struct rb_node *parent = NULL;
  260. struct request *__rq;
  261. while (*p) {
  262. parent = *p;
  263. __rq = rb_entry(parent, struct request, rb_node);
  264. if (blk_rq_pos(rq) < blk_rq_pos(__rq))
  265. p = &(*p)->rb_left;
  266. else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
  267. p = &(*p)->rb_right;
  268. }
  269. rb_link_node(&rq->rb_node, parent, p);
  270. rb_insert_color(&rq->rb_node, root);
  271. }
  272. EXPORT_SYMBOL(elv_rb_add);
  273. void elv_rb_del(struct rb_root *root, struct request *rq)
  274. {
  275. BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
  276. rb_erase(&rq->rb_node, root);
  277. RB_CLEAR_NODE(&rq->rb_node);
  278. }
  279. EXPORT_SYMBOL(elv_rb_del);
  280. struct request *elv_rb_find(struct rb_root *root, sector_t sector)
  281. {
  282. struct rb_node *n = root->rb_node;
  283. struct request *rq;
  284. while (n) {
  285. rq = rb_entry(n, struct request, rb_node);
  286. if (sector < blk_rq_pos(rq))
  287. n = n->rb_left;
  288. else if (sector > blk_rq_pos(rq))
  289. n = n->rb_right;
  290. else
  291. return rq;
  292. }
  293. return NULL;
  294. }
  295. EXPORT_SYMBOL(elv_rb_find);
  296. /*
  297. * Insert rq into dispatch queue of q. Queue lock must be held on
  298. * entry. rq is sort instead into the dispatch queue. To be used by
  299. * specific elevators.
  300. */
  301. void elv_dispatch_sort(struct request_queue *q, struct request *rq)
  302. {
  303. sector_t boundary;
  304. struct list_head *entry;
  305. int stop_flags;
  306. if (q->last_merge == rq)
  307. q->last_merge = NULL;
  308. elv_rqhash_del(q, rq);
  309. q->nr_sorted--;
  310. boundary = q->end_sector;
  311. stop_flags = REQ_SOFTBARRIER | REQ_STARTED;
  312. list_for_each_prev(entry, &q->queue_head) {
  313. struct request *pos = list_entry_rq(entry);
  314. if ((rq->cmd_flags & REQ_DISCARD) !=
  315. (pos->cmd_flags & REQ_DISCARD))
  316. break;
  317. if (rq_data_dir(rq) != rq_data_dir(pos))
  318. break;
  319. if (pos->cmd_flags & stop_flags)
  320. break;
  321. if (blk_rq_pos(rq) >= boundary) {
  322. if (blk_rq_pos(pos) < boundary)
  323. continue;
  324. } else {
  325. if (blk_rq_pos(pos) >= boundary)
  326. break;
  327. }
  328. if (blk_rq_pos(rq) >= blk_rq_pos(pos))
  329. break;
  330. }
  331. list_add(&rq->queuelist, entry);
  332. }
  333. EXPORT_SYMBOL(elv_dispatch_sort);
  334. /*
  335. * Insert rq into dispatch queue of q. Queue lock must be held on
  336. * entry. rq is added to the back of the dispatch queue. To be used by
  337. * specific elevators.
  338. */
  339. void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
  340. {
  341. if (q->last_merge == rq)
  342. q->last_merge = NULL;
  343. elv_rqhash_del(q, rq);
  344. q->nr_sorted--;
  345. q->end_sector = rq_end_sector(rq);
  346. q->boundary_rq = rq;
  347. list_add_tail(&rq->queuelist, &q->queue_head);
  348. }
  349. EXPORT_SYMBOL(elv_dispatch_add_tail);
  350. int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
  351. {
  352. struct elevator_queue *e = q->elevator;
  353. struct request *__rq;
  354. int ret;
  355. /*
  356. * Levels of merges:
  357. * nomerges: No merges at all attempted
  358. * noxmerges: Only simple one-hit cache try
  359. * merges: All merge tries attempted
  360. */
  361. if (blk_queue_nomerges(q))
  362. return ELEVATOR_NO_MERGE;
  363. /*
  364. * First try one-hit cache.
  365. */
  366. if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
  367. ret = blk_try_merge(q->last_merge, bio);
  368. if (ret != ELEVATOR_NO_MERGE) {
  369. *req = q->last_merge;
  370. return ret;
  371. }
  372. }
  373. if (blk_queue_noxmerges(q))
  374. return ELEVATOR_NO_MERGE;
  375. /*
  376. * See if our hash lookup can find a potential backmerge.
  377. */
  378. __rq = elv_rqhash_find(q, bio->bi_sector);
  379. if (__rq && elv_rq_merge_ok(__rq, bio)) {
  380. *req = __rq;
  381. return ELEVATOR_BACK_MERGE;
  382. }
  383. if (e->type->ops.elevator_merge_fn)
  384. return e->type->ops.elevator_merge_fn(q, req, bio);
  385. return ELEVATOR_NO_MERGE;
  386. }
  387. /*
  388. * Attempt to do an insertion back merge. Only check for the case where
  389. * we can append 'rq' to an existing request, so we can throw 'rq' away
  390. * afterwards.
  391. *
  392. * Returns true if we merged, false otherwise
  393. */
  394. static bool elv_attempt_insert_merge(struct request_queue *q,
  395. struct request *rq)
  396. {
  397. struct request *__rq;
  398. bool ret;
  399. if (blk_queue_nomerges(q))
  400. return false;
  401. /*
  402. * First try one-hit cache.
  403. */
  404. if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
  405. return true;
  406. if (blk_queue_noxmerges(q))
  407. return false;
  408. ret = false;
  409. /*
  410. * See if our hash lookup can find a potential backmerge.
  411. */
  412. while (1) {
  413. __rq = elv_rqhash_find(q, blk_rq_pos(rq));
  414. if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
  415. break;
  416. /* The merged request could be merged with others, try again */
  417. ret = true;
  418. rq = __rq;
  419. }
  420. return ret;
  421. }
  422. void elv_merged_request(struct request_queue *q, struct request *rq, int type)
  423. {
  424. struct elevator_queue *e = q->elevator;
  425. if (e->type->ops.elevator_merged_fn)
  426. e->type->ops.elevator_merged_fn(q, rq, type);
  427. if (type == ELEVATOR_BACK_MERGE)
  428. elv_rqhash_reposition(q, rq);
  429. q->last_merge = rq;
  430. }
  431. void elv_merge_requests(struct request_queue *q, struct request *rq,
  432. struct request *next)
  433. {
  434. struct elevator_queue *e = q->elevator;
  435. const int next_sorted = next->cmd_flags & REQ_SORTED;
  436. if (next_sorted && e->type->ops.elevator_merge_req_fn)
  437. e->type->ops.elevator_merge_req_fn(q, rq, next);
  438. elv_rqhash_reposition(q, rq);
  439. if (next_sorted) {
  440. elv_rqhash_del(q, next);
  441. q->nr_sorted--;
  442. }
  443. q->last_merge = rq;
  444. }
  445. void elv_bio_merged(struct request_queue *q, struct request *rq,
  446. struct bio *bio)
  447. {
  448. struct elevator_queue *e = q->elevator;
  449. if (e->type->ops.elevator_bio_merged_fn)
  450. e->type->ops.elevator_bio_merged_fn(q, rq, bio);
  451. }
  452. #ifdef CONFIG_PM_RUNTIME
  453. static void blk_pm_requeue_request(struct request *rq)
  454. {
  455. if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
  456. rq->q->nr_pending--;
  457. }
  458. static void blk_pm_add_request(struct request_queue *q, struct request *rq)
  459. {
  460. if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 &&
  461. (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
  462. pm_request_resume(q->dev);
  463. }
  464. #else
  465. static inline void blk_pm_requeue_request(struct request *rq) {}
  466. static inline void blk_pm_add_request(struct request_queue *q,
  467. struct request *rq)
  468. {
  469. }
  470. #endif
  471. void elv_requeue_request(struct request_queue *q, struct request *rq)
  472. {
  473. /*
  474. * it already went through dequeue, we need to decrement the
  475. * in_flight count again
  476. */
  477. if (blk_account_rq(rq)) {
  478. q->in_flight[rq_is_sync(rq)]--;
  479. if (rq->cmd_flags & REQ_SORTED)
  480. elv_deactivate_rq(q, rq);
  481. }
  482. rq->cmd_flags &= ~REQ_STARTED;
  483. blk_pm_requeue_request(rq);
  484. __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
  485. }
  486. /**
  487. * elv_reinsert_request() - Insert a request back to the scheduler
  488. * @q: request queue where request should be inserted
  489. * @rq: request to be inserted
  490. *
  491. * This function returns the request back to the scheduler to be
  492. * inserted as if it was never dispatched
  493. *
  494. * Return: 0 on success, error code on failure
  495. */
  496. int elv_reinsert_request(struct request_queue *q, struct request *rq)
  497. {
  498. int res;
  499. if (!q->elevator->type->ops.elevator_reinsert_req_fn)
  500. return -EPERM;
  501. res = q->elevator->type->ops.elevator_reinsert_req_fn(q, rq);
  502. if (!res) {
  503. /*
  504. * it already went through dequeue, we need to decrement the
  505. * in_flight count again
  506. */
  507. if (blk_account_rq(rq)) {
  508. q->in_flight[rq_is_sync(rq)]--;
  509. if (rq->cmd_flags & REQ_SORTED)
  510. elv_deactivate_rq(q, rq);
  511. }
  512. rq->cmd_flags &= ~REQ_STARTED;
  513. q->nr_sorted++;
  514. }
  515. return res;
  516. }
  517. void elv_drain_elevator(struct request_queue *q)
  518. {
  519. static int printed;
  520. lockdep_assert_held(q->queue_lock);
  521. while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
  522. ;
  523. if (q->nr_sorted && printed++ < 10) {
  524. printk(KERN_ERR "%s: forced dispatching is broken "
  525. "(nr_sorted=%u), please report this\n",
  526. q->elevator->type->elevator_name, q->nr_sorted);
  527. }
  528. }
  529. void __elv_add_request(struct request_queue *q, struct request *rq, int where)
  530. {
  531. trace_block_rq_insert(q, rq);
  532. blk_pm_add_request(q, rq);
  533. rq->q = q;
  534. if (rq->cmd_flags & REQ_SOFTBARRIER) {
  535. /* barriers are scheduling boundary, update end_sector */
  536. if (rq->cmd_type == REQ_TYPE_FS ||
  537. (rq->cmd_flags & (REQ_DISCARD | REQ_SANITIZE))) {
  538. q->end_sector = rq_end_sector(rq);
  539. q->boundary_rq = rq;
  540. }
  541. } else if (!(rq->cmd_flags & REQ_ELVPRIV) &&
  542. (where == ELEVATOR_INSERT_SORT ||
  543. where == ELEVATOR_INSERT_SORT_MERGE))
  544. where = ELEVATOR_INSERT_BACK;
  545. switch (where) {
  546. case ELEVATOR_INSERT_REQUEUE:
  547. case ELEVATOR_INSERT_FRONT:
  548. rq->cmd_flags |= REQ_SOFTBARRIER;
  549. list_add(&rq->queuelist, &q->queue_head);
  550. break;
  551. case ELEVATOR_INSERT_BACK:
  552. rq->cmd_flags |= REQ_SOFTBARRIER;
  553. elv_drain_elevator(q);
  554. list_add_tail(&rq->queuelist, &q->queue_head);
  555. /*
  556. * We kick the queue here for the following reasons.
  557. * - The elevator might have returned NULL previously
  558. * to delay requests and returned them now. As the
  559. * queue wasn't empty before this request, ll_rw_blk
  560. * won't run the queue on return, resulting in hang.
  561. * - Usually, back inserted requests won't be merged
  562. * with anything. There's no point in delaying queue
  563. * processing.
  564. */
  565. __blk_run_queue(q);
  566. break;
  567. case ELEVATOR_INSERT_SORT_MERGE:
  568. /*
  569. * If we succeed in merging this request with one in the
  570. * queue already, we are done - rq has now been freed,
  571. * so no need to do anything further.
  572. */
  573. if (elv_attempt_insert_merge(q, rq))
  574. break;
  575. case ELEVATOR_INSERT_SORT:
  576. BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
  577. !(rq->cmd_flags & REQ_DISCARD));
  578. rq->cmd_flags |= REQ_SORTED;
  579. q->nr_sorted++;
  580. if (rq_mergeable(rq)) {
  581. elv_rqhash_add(q, rq);
  582. if (!q->last_merge)
  583. q->last_merge = rq;
  584. }
  585. /*
  586. * Some ioscheds (cfq) run q->request_fn directly, so
  587. * rq cannot be accessed after calling
  588. * elevator_add_req_fn.
  589. */
  590. q->elevator->type->ops.elevator_add_req_fn(q, rq);
  591. break;
  592. case ELEVATOR_INSERT_FLUSH:
  593. rq->cmd_flags |= REQ_SOFTBARRIER;
  594. blk_insert_flush(rq);
  595. break;
  596. default:
  597. printk(KERN_ERR "%s: bad insertion point %d\n",
  598. __func__, where);
  599. BUG();
  600. }
  601. }
  602. EXPORT_SYMBOL(__elv_add_request);
  603. void elv_add_request(struct request_queue *q, struct request *rq, int where)
  604. {
  605. unsigned long flags;
  606. spin_lock_irqsave(q->queue_lock, flags);
  607. __elv_add_request(q, rq, where);
  608. spin_unlock_irqrestore(q->queue_lock, flags);
  609. }
  610. EXPORT_SYMBOL(elv_add_request);
  611. struct request *elv_latter_request(struct request_queue *q, struct request *rq)
  612. {
  613. struct elevator_queue *e = q->elevator;
  614. if (e->type->ops.elevator_latter_req_fn)
  615. return e->type->ops.elevator_latter_req_fn(q, rq);
  616. return NULL;
  617. }
  618. struct request *elv_former_request(struct request_queue *q, struct request *rq)
  619. {
  620. struct elevator_queue *e = q->elevator;
  621. if (e->type->ops.elevator_former_req_fn)
  622. return e->type->ops.elevator_former_req_fn(q, rq);
  623. return NULL;
  624. }
  625. int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
  626. {
  627. struct elevator_queue *e = q->elevator;
  628. if (e->type->ops.elevator_set_req_fn)
  629. return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
  630. return 0;
  631. }
  632. void elv_put_request(struct request_queue *q, struct request *rq)
  633. {
  634. struct elevator_queue *e = q->elevator;
  635. if (e->type->ops.elevator_put_req_fn)
  636. e->type->ops.elevator_put_req_fn(rq);
  637. }
  638. int elv_may_queue(struct request_queue *q, int rw)
  639. {
  640. struct elevator_queue *e = q->elevator;
  641. if (e->type->ops.elevator_may_queue_fn)
  642. return e->type->ops.elevator_may_queue_fn(q, rw);
  643. return ELV_MQUEUE_MAY;
  644. }
  645. void elv_abort_queue(struct request_queue *q)
  646. {
  647. struct request *rq;
  648. blk_abort_flushes(q);
  649. while (!list_empty(&q->queue_head)) {
  650. rq = list_entry_rq(q->queue_head.next);
  651. rq->cmd_flags |= REQ_QUIET;
  652. trace_block_rq_abort(q, rq);
  653. /*
  654. * Mark this request as started so we don't trigger
  655. * any debug logic in the end I/O path.
  656. */
  657. blk_start_request(rq);
  658. __blk_end_request_all(rq, -EIO);
  659. }
  660. }
  661. EXPORT_SYMBOL(elv_abort_queue);
  662. void elv_completed_request(struct request_queue *q, struct request *rq)
  663. {
  664. struct elevator_queue *e = q->elevator;
  665. if (rq->cmd_flags & REQ_URGENT) {
  666. q->notified_urgent = false;
  667. WARN_ON(!q->dispatched_urgent);
  668. q->dispatched_urgent = false;
  669. }
  670. /*
  671. * request is released from the driver, io must be done
  672. */
  673. if (blk_account_rq(rq)) {
  674. q->in_flight[rq_is_sync(rq)]--;
  675. if ((rq->cmd_flags & REQ_SORTED) &&
  676. e->type->ops.elevator_completed_req_fn)
  677. e->type->ops.elevator_completed_req_fn(q, rq);
  678. }
  679. }
  680. #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
  681. static ssize_t
  682. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  683. {
  684. struct elv_fs_entry *entry = to_elv(attr);
  685. struct elevator_queue *e;
  686. ssize_t error;
  687. if (!entry->show)
  688. return -EIO;
  689. e = container_of(kobj, struct elevator_queue, kobj);
  690. mutex_lock(&e->sysfs_lock);
  691. error = e->type ? entry->show(e, page) : -ENOENT;
  692. mutex_unlock(&e->sysfs_lock);
  693. return error;
  694. }
  695. static ssize_t
  696. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  697. const char *page, size_t length)
  698. {
  699. struct elv_fs_entry *entry = to_elv(attr);
  700. struct elevator_queue *e;
  701. ssize_t error;
  702. if (!entry->store)
  703. return -EIO;
  704. e = container_of(kobj, struct elevator_queue, kobj);
  705. mutex_lock(&e->sysfs_lock);
  706. error = e->type ? entry->store(e, page, length) : -ENOENT;
  707. mutex_unlock(&e->sysfs_lock);
  708. return error;
  709. }
  710. static const struct sysfs_ops elv_sysfs_ops = {
  711. .show = elv_attr_show,
  712. .store = elv_attr_store,
  713. };
  714. static struct kobj_type elv_ktype = {
  715. .sysfs_ops = &elv_sysfs_ops,
  716. .release = elevator_release,
  717. };
  718. int elv_register_queue(struct request_queue *q)
  719. {
  720. struct elevator_queue *e = q->elevator;
  721. int error;
  722. error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
  723. if (!error) {
  724. struct elv_fs_entry *attr = e->type->elevator_attrs;
  725. if (attr) {
  726. while (attr->attr.name) {
  727. if (sysfs_create_file(&e->kobj, &attr->attr))
  728. break;
  729. attr++;
  730. }
  731. }
  732. kobject_uevent(&e->kobj, KOBJ_ADD);
  733. e->registered = 1;
  734. if (e->type->ops.elevator_registered_fn)
  735. e->type->ops.elevator_registered_fn(q);
  736. }
  737. return error;
  738. }
  739. EXPORT_SYMBOL(elv_register_queue);
  740. void elv_unregister_queue(struct request_queue *q)
  741. {
  742. if (q) {
  743. struct elevator_queue *e = q->elevator;
  744. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  745. kobject_del(&e->kobj);
  746. e->registered = 0;
  747. }
  748. }
  749. EXPORT_SYMBOL(elv_unregister_queue);
  750. int elv_register(struct elevator_type *e)
  751. {
  752. char *def = "";
  753. /* create icq_cache if requested */
  754. if (e->icq_size) {
  755. if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
  756. WARN_ON(e->icq_align < __alignof__(struct io_cq)))
  757. return -EINVAL;
  758. snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
  759. "%s_io_cq", e->elevator_name);
  760. e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
  761. e->icq_align, 0, NULL);
  762. if (!e->icq_cache)
  763. return -ENOMEM;
  764. }
  765. /* register, don't allow duplicate names */
  766. spin_lock(&elv_list_lock);
  767. if (elevator_find(e->elevator_name)) {
  768. spin_unlock(&elv_list_lock);
  769. if (e->icq_cache)
  770. kmem_cache_destroy(e->icq_cache);
  771. return -EBUSY;
  772. }
  773. list_add_tail(&e->list, &elv_list);
  774. spin_unlock(&elv_list_lock);
  775. /* print pretty message */
  776. if (!strcmp(e->elevator_name, chosen_elevator) ||
  777. (!*chosen_elevator &&
  778. !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
  779. def = " (default)";
  780. printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
  781. def);
  782. return 0;
  783. }
  784. EXPORT_SYMBOL_GPL(elv_register);
  785. void elv_unregister(struct elevator_type *e)
  786. {
  787. /* unregister */
  788. spin_lock(&elv_list_lock);
  789. list_del_init(&e->list);
  790. spin_unlock(&elv_list_lock);
  791. /*
  792. * Destroy icq_cache if it exists. icq's are RCU managed. Make
  793. * sure all RCU operations are complete before proceeding.
  794. */
  795. if (e->icq_cache) {
  796. rcu_barrier();
  797. kmem_cache_destroy(e->icq_cache);
  798. e->icq_cache = NULL;
  799. }
  800. }
  801. EXPORT_SYMBOL_GPL(elv_unregister);
  802. /*
  803. * switch to new_e io scheduler. be careful not to introduce deadlocks -
  804. * we don't free the old io scheduler, before we have allocated what we
  805. * need for the new one. this way we have a chance of going back to the old
  806. * one, if the new one fails init for some reason.
  807. */
  808. static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
  809. {
  810. struct elevator_queue *old = q->elevator;
  811. bool registered = old->registered;
  812. int err;
  813. /*
  814. * Turn on BYPASS and drain all requests w/ elevator private data.
  815. * Block layer doesn't call into a quiesced elevator - all requests
  816. * are directly put on the dispatch list without elevator data
  817. * using INSERT_BACK. All requests have SOFTBARRIER set and no
  818. * merge happens either.
  819. */
  820. blk_queue_bypass_start(q);
  821. /* unregister and clear all auxiliary data of the old elevator */
  822. if (registered)
  823. elv_unregister_queue(q);
  824. spin_lock_irq(q->queue_lock);
  825. ioc_clear_queue(q);
  826. spin_unlock_irq(q->queue_lock);
  827. blkg_destroy_all(q);
  828. /* allocate, init and register new elevator */
  829. err = -ENOMEM;
  830. q->elevator = elevator_alloc(q, new_e);
  831. if (!q->elevator)
  832. goto fail_init;
  833. err = new_e->ops.elevator_init_fn(q);
  834. if (err) {
  835. kobject_put(&q->elevator->kobj);
  836. goto fail_init;
  837. }
  838. if (registered) {
  839. err = elv_register_queue(q);
  840. if (err)
  841. goto fail_register;
  842. }
  843. /* done, kill the old one and finish */
  844. elevator_exit(old);
  845. blk_queue_bypass_end(q);
  846. blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
  847. return 0;
  848. fail_register:
  849. elevator_exit(q->elevator);
  850. fail_init:
  851. /* switch failed, restore and re-register old elevator */
  852. q->elevator = old;
  853. elv_register_queue(q);
  854. blk_queue_bypass_end(q);
  855. return err;
  856. }
  857. /*
  858. * Switch this queue to the given IO scheduler.
  859. */
  860. static int __elevator_change(struct request_queue *q, const char *name)
  861. {
  862. char elevator_name[ELV_NAME_MAX];
  863. struct elevator_type *e;
  864. if (!q->elevator)
  865. return -ENXIO;
  866. strlcpy(elevator_name, name, sizeof(elevator_name));
  867. e = elevator_get(strstrip(elevator_name));
  868. if (!e) {
  869. printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
  870. return -EINVAL;
  871. }
  872. if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
  873. elevator_put(e);
  874. return 0;
  875. }
  876. return elevator_switch(q, e);
  877. }
  878. int elevator_change(struct request_queue *q, const char *name)
  879. {
  880. int ret;
  881. /* Protect q->elevator from elevator_init() */
  882. mutex_lock(&q->sysfs_lock);
  883. ret = __elevator_change(q, name);
  884. mutex_unlock(&q->sysfs_lock);
  885. return ret;
  886. }
  887. EXPORT_SYMBOL(elevator_change);
  888. ssize_t elv_iosched_store(struct request_queue *q, const char *name,
  889. size_t count)
  890. {
  891. int ret;
  892. if (!q->elevator)
  893. return count;
  894. ret = __elevator_change(q, name);
  895. if (!ret)
  896. return count;
  897. printk(KERN_ERR "elevator: switch to %s failed\n", name);
  898. return ret;
  899. }
  900. ssize_t elv_iosched_show(struct request_queue *q, char *name)
  901. {
  902. struct elevator_queue *e = q->elevator;
  903. struct elevator_type *elv;
  904. struct elevator_type *__e;
  905. int len = 0;
  906. if (!q->elevator || !blk_queue_stackable(q))
  907. return sprintf(name, "none\n");
  908. elv = e->type;
  909. spin_lock(&elv_list_lock);
  910. list_for_each_entry(__e, &elv_list, list) {
  911. if (!strcmp(elv->elevator_name, __e->elevator_name))
  912. len += sprintf(name+len, "[%s] ", elv->elevator_name);
  913. else
  914. len += sprintf(name+len, "%s ", __e->elevator_name);
  915. }
  916. spin_unlock(&elv_list_lock);
  917. len += sprintf(len+name, "\n");
  918. return len;
  919. }
  920. struct request *elv_rb_former_request(struct request_queue *q,
  921. struct request *rq)
  922. {
  923. struct rb_node *rbprev = rb_prev(&rq->rb_node);
  924. if (rbprev)
  925. return rb_entry_rq(rbprev);
  926. return NULL;
  927. }
  928. EXPORT_SYMBOL(elv_rb_former_request);
  929. struct request *elv_rb_latter_request(struct request_queue *q,
  930. struct request *rq)
  931. {
  932. struct rb_node *rbnext = rb_next(&rq->rb_node);
  933. if (rbnext)
  934. return rb_entry_rq(rbnext);
  935. return NULL;
  936. }
  937. EXPORT_SYMBOL(elv_rb_latter_request);