sio-iosched.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /*
  2. * Simple IO scheduler
  3. * Based on Noop, Deadline and V(R) IO schedulers.
  4. *
  5. * Copyright (C) 2012 Miguel Boton <mboton@gmail.com>
  6. *
  7. *
  8. * This algorithm does not do any kind of sorting, as it is aimed for
  9. * aleatory access devices, but it does some basic merging. We try to
  10. * keep minimum overhead to achieve low latency.
  11. *
  12. * Asynchronous and synchronous requests are not treated separately, but
  13. * we relay on deadlines to ensure fairness.
  14. *
  15. */
  16. #include <linux/blkdev.h>
  17. #include <linux/elevator.h>
  18. #include <linux/bio.h>
  19. #include <linux/module.h>
  20. #include <linux/init.h>
  21. #include <linux/version.h>
  22. #include <linux/slab.h>
  23. enum { ASYNC, SYNC };
  24. /* Tunables */
  25. static const int sync_read_expire = HZ / 2; /* max time before a sync read is submitted. */
  26. static const int sync_write_expire = 2 * HZ; /* max time before a sync write is submitted. */
  27. static const int async_read_expire = 4 * HZ; /* ditto for async, these limits are SOFT! */
  28. static const int async_write_expire = 16 * HZ; /* ditto for async, these limits are SOFT! */
  29. static const int writes_starved = 2; /* max times reads can starve a write */
  30. static const int fifo_batch = 8; /* # of sequential requests treated as one
  31. by the above parameters. For throughput. */
  32. /* Elevator data */
  33. struct sio_data {
  34. /* Request queues */
  35. struct list_head fifo_list[2][2];
  36. /* Attributes */
  37. unsigned int batched;
  38. unsigned int starved;
  39. /* Settings */
  40. int fifo_expire[2][2];
  41. int fifo_batch;
  42. int writes_starved;
  43. };
  44. static void
  45. sio_merged_requests(struct request_queue *q, struct request *rq,
  46. struct request *next)
  47. {
  48. /*
  49. * If next expires before rq, assign its expire time to rq
  50. * and move into next position (next will be deleted) in fifo.
  51. */
  52. if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
  53. if (time_before((unsigned long)next->fifo_time, (unsigned long)rq->fifo_time)) {
  54. list_move(&rq->queuelist, &next->queuelist);
  55. rq->fifo_time = next->fifo_time;
  56. }
  57. }
  58. /* Delete next request */
  59. rq_fifo_clear(next);
  60. }
  61. static void
  62. sio_add_request(struct request_queue *q, struct request *rq)
  63. {
  64. struct sio_data *sd = q->elevator->elevator_data;
  65. const int sync = rq_is_sync(rq);
  66. const int data_dir = rq_data_dir(rq);
  67. /*
  68. * Add request to the proper fifo list and set its
  69. * expire time.
  70. */
  71. rq->fifo_time = jiffies + sd->fifo_expire[sync][data_dir];
  72. list_add_tail(&rq->queuelist, &sd->fifo_list[sync][data_dir]);
  73. }
  74. #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38)
  75. static int
  76. sio_queue_empty(struct request_queue *q)
  77. {
  78. struct sio_data *sd = q->elevator->elevator_data;
  79. /* Check if fifo lists are empty */
  80. return list_empty(&sd->fifo_list[SYNC][READ]) && list_empty(&sd->fifo_list[SYNC][WRITE]) &&
  81. list_empty(&sd->fifo_list[ASYNC][READ]) && list_empty(&sd->fifo_list[ASYNC][WRITE]);
  82. }
  83. #endif
  84. static struct request *
  85. sio_expired_request(struct sio_data *sd, int sync, int data_dir)
  86. {
  87. struct list_head *list = &sd->fifo_list[sync][data_dir];
  88. struct request *rq;
  89. if (list_empty(list))
  90. return NULL;
  91. /* Retrieve request */
  92. rq = rq_entry_fifo(list->next);
  93. /* Request has expired */
  94. if (time_after(jiffies, (unsigned long)rq->fifo_time))
  95. return rq;
  96. return NULL;
  97. }
  98. static struct request *
  99. sio_choose_expired_request(struct sio_data *sd)
  100. {
  101. struct request *rq;
  102. /*
  103. * Check expired requests.
  104. * Asynchronous requests have priority over synchronous.
  105. * Write requests have priority over read.
  106. */
  107. rq = sio_expired_request(sd, ASYNC, WRITE);
  108. if (rq)
  109. return rq;
  110. rq = sio_expired_request(sd, ASYNC, READ);
  111. if (rq)
  112. return rq;
  113. rq = sio_expired_request(sd, SYNC, WRITE);
  114. if (rq)
  115. return rq;
  116. rq = sio_expired_request(sd, SYNC, READ);
  117. if (rq)
  118. return rq;
  119. return NULL;
  120. }
  121. static struct request *
  122. sio_choose_request(struct sio_data *sd, int data_dir)
  123. {
  124. struct list_head *sync = sd->fifo_list[SYNC];
  125. struct list_head *async = sd->fifo_list[ASYNC];
  126. /*
  127. * Retrieve request from available fifo list.
  128. * Synchronous requests have priority over asynchronous.
  129. * Read requests have priority over write.
  130. */
  131. if (!list_empty(&sync[data_dir]))
  132. return rq_entry_fifo(sync[data_dir].next);
  133. if (!list_empty(&async[data_dir]))
  134. return rq_entry_fifo(async[data_dir].next);
  135. if (!list_empty(&sync[!data_dir]))
  136. return rq_entry_fifo(sync[!data_dir].next);
  137. if (!list_empty(&async[!data_dir]))
  138. return rq_entry_fifo(async[!data_dir].next);
  139. return NULL;
  140. }
  141. static inline void
  142. sio_dispatch_request(struct sio_data *sd, struct request *rq)
  143. {
  144. /*
  145. * Remove the request from the fifo list
  146. * and dispatch it.
  147. */
  148. rq_fifo_clear(rq);
  149. elv_dispatch_add_tail(rq->q, rq);
  150. sd->batched++;
  151. if (rq_data_dir(rq))
  152. sd->starved = 0;
  153. else
  154. sd->starved++;
  155. }
  156. static int
  157. sio_dispatch_requests(struct request_queue *q, int force)
  158. {
  159. struct sio_data *sd = q->elevator->elevator_data;
  160. struct request *rq = NULL;
  161. int data_dir = READ;
  162. /*
  163. * Retrieve any expired request after a batch of
  164. * sequential requests.
  165. */
  166. if (sd->batched > sd->fifo_batch) {
  167. sd->batched = 0;
  168. rq = sio_choose_expired_request(sd);
  169. }
  170. /* Retrieve request */
  171. if (!rq) {
  172. if (sd->starved > sd->writes_starved)
  173. data_dir = WRITE;
  174. rq = sio_choose_request(sd, data_dir);
  175. if (!rq)
  176. return 0;
  177. }
  178. /* Dispatch request */
  179. sio_dispatch_request(sd, rq);
  180. return 1;
  181. }
  182. static struct request *
  183. sio_former_request(struct request_queue *q, struct request *rq)
  184. {
  185. struct sio_data *sd = q->elevator->elevator_data;
  186. const int sync = rq_is_sync(rq);
  187. const int data_dir = rq_data_dir(rq);
  188. if (rq->queuelist.prev == &sd->fifo_list[sync][data_dir])
  189. return NULL;
  190. /* Return former request */
  191. return list_entry(rq->queuelist.prev, struct request, queuelist);
  192. }
  193. static struct request *
  194. sio_latter_request(struct request_queue *q, struct request *rq)
  195. {
  196. struct sio_data *sd = q->elevator->elevator_data;
  197. const int sync = rq_is_sync(rq);
  198. const int data_dir = rq_data_dir(rq);
  199. if (rq->queuelist.next == &sd->fifo_list[sync][data_dir])
  200. return NULL;
  201. /* Return latter request */
  202. return list_entry(rq->queuelist.next, struct request, queuelist);
  203. }
  204. static int sio_init_queue(struct request_queue *q, struct elevator_type *e)
  205. {
  206. struct sio_data *sd;
  207. struct elevator_queue *eq;
  208. eq = elevator_alloc(q, e);
  209. if (!eq)
  210. return -ENOMEM;
  211. /* Allocate structure */
  212. sd = kmalloc_node(sizeof(*sd), GFP_KERNEL, q->node);
  213. if (!sd) {
  214. kobject_put(&eq->kobj);
  215. return -ENOMEM;
  216. }
  217. eq->elevator_data = sd;
  218. spin_lock_irq(q->queue_lock);
  219. q->elevator = eq;
  220. spin_unlock_irq(q->queue_lock);
  221. /* Initialize fifo lists */
  222. INIT_LIST_HEAD(&sd->fifo_list[SYNC][READ]);
  223. INIT_LIST_HEAD(&sd->fifo_list[SYNC][WRITE]);
  224. INIT_LIST_HEAD(&sd->fifo_list[ASYNC][READ]);
  225. INIT_LIST_HEAD(&sd->fifo_list[ASYNC][WRITE]);
  226. /* Initialize data */
  227. sd->batched = 0;
  228. sd->fifo_expire[SYNC][READ] = sync_read_expire;
  229. sd->fifo_expire[SYNC][WRITE] = sync_write_expire;
  230. sd->fifo_expire[ASYNC][READ] = async_read_expire;
  231. sd->fifo_expire[ASYNC][WRITE] = async_write_expire;
  232. sd->fifo_batch = fifo_batch;
  233. return 0;
  234. }
  235. static void
  236. sio_exit_queue(struct elevator_queue *e)
  237. {
  238. struct sio_data *sd = e->elevator_data;
  239. BUG_ON(!list_empty(&sd->fifo_list[SYNC][READ]));
  240. BUG_ON(!list_empty(&sd->fifo_list[SYNC][WRITE]));
  241. BUG_ON(!list_empty(&sd->fifo_list[ASYNC][READ]));
  242. BUG_ON(!list_empty(&sd->fifo_list[ASYNC][WRITE]));
  243. /* Free structure */
  244. kfree(sd);
  245. }
  246. /*
  247. * sysfs code
  248. */
  249. static ssize_t
  250. sio_var_show(int var, char *page)
  251. {
  252. return sprintf(page, "%d\n", var);
  253. }
  254. static ssize_t
  255. sio_var_store(int *var, const char *page, size_t count)
  256. {
  257. char *p = (char *) page;
  258. *var = simple_strtol(p, &p, 10);
  259. return count;
  260. }
  261. #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
  262. static ssize_t __FUNC(struct elevator_queue *e, char *page) \
  263. { \
  264. struct sio_data *sd = e->elevator_data; \
  265. int __data = __VAR; \
  266. if (__CONV) \
  267. __data = jiffies_to_msecs(__data); \
  268. return sio_var_show(__data, (page)); \
  269. }
  270. SHOW_FUNCTION(sio_sync_read_expire_show, sd->fifo_expire[SYNC][READ], 1);
  271. SHOW_FUNCTION(sio_sync_write_expire_show, sd->fifo_expire[SYNC][WRITE], 1);
  272. SHOW_FUNCTION(sio_async_read_expire_show, sd->fifo_expire[ASYNC][READ], 1);
  273. SHOW_FUNCTION(sio_async_write_expire_show, sd->fifo_expire[ASYNC][WRITE], 1);
  274. SHOW_FUNCTION(sio_fifo_batch_show, sd->fifo_batch, 0);
  275. SHOW_FUNCTION(sio_writes_starved_show, sd->writes_starved, 0);
  276. #undef SHOW_FUNCTION
  277. #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
  278. static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
  279. { \
  280. struct sio_data *sd = e->elevator_data; \
  281. int __data; \
  282. int ret = sio_var_store(&__data, (page), count); \
  283. if (__data < (MIN)) \
  284. __data = (MIN); \
  285. else if (__data > (MAX)) \
  286. __data = (MAX); \
  287. if (__CONV) \
  288. *(__PTR) = msecs_to_jiffies(__data); \
  289. else \
  290. *(__PTR) = __data; \
  291. return ret; \
  292. }
  293. STORE_FUNCTION(sio_sync_read_expire_store, &sd->fifo_expire[SYNC][READ], 0, INT_MAX, 1);
  294. STORE_FUNCTION(sio_sync_write_expire_store, &sd->fifo_expire[SYNC][WRITE], 0, INT_MAX, 1);
  295. STORE_FUNCTION(sio_async_read_expire_store, &sd->fifo_expire[ASYNC][READ], 0, INT_MAX, 1);
  296. STORE_FUNCTION(sio_async_write_expire_store, &sd->fifo_expire[ASYNC][WRITE], 0, INT_MAX, 1);
  297. STORE_FUNCTION(sio_fifo_batch_store, &sd->fifo_batch, 0, INT_MAX, 0);
  298. STORE_FUNCTION(sio_writes_starved_store, &sd->writes_starved, 0, INT_MAX, 0);
  299. #undef STORE_FUNCTION
  300. #define DD_ATTR(name) \
  301. __ATTR(name, S_IRUGO|S_IWUSR, sio_##name##_show, \
  302. sio_##name##_store)
  303. static struct elv_fs_entry sio_attrs[] = {
  304. DD_ATTR(sync_read_expire),
  305. DD_ATTR(sync_write_expire),
  306. DD_ATTR(async_read_expire),
  307. DD_ATTR(async_write_expire),
  308. DD_ATTR(fifo_batch),
  309. DD_ATTR(writes_starved),
  310. __ATTR_NULL
  311. };
  312. static struct elevator_type iosched_sio = {
  313. .ops.sq = {
  314. .elevator_merge_req_fn = sio_merged_requests,
  315. .elevator_dispatch_fn = sio_dispatch_requests,
  316. .elevator_add_req_fn = sio_add_request,
  317. #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38)
  318. .elevator_queue_empty_fn = sio_queue_empty,
  319. #endif
  320. .elevator_former_req_fn = sio_former_request,
  321. .elevator_latter_req_fn = sio_latter_request,
  322. .elevator_init_fn = sio_init_queue,
  323. .elevator_exit_fn = sio_exit_queue,
  324. },
  325. .elevator_attrs = sio_attrs,
  326. .elevator_name = "sio",
  327. .elevator_owner = THIS_MODULE,
  328. };
  329. static int __init sio_init(void)
  330. {
  331. /* Register elevator */
  332. elv_register(&iosched_sio);
  333. return 0;
  334. }
  335. static void __exit sio_exit(void)
  336. {
  337. /* Unregister elevator */
  338. elv_unregister(&iosched_sio);
  339. }
  340. module_init(sio_init);
  341. module_exit(sio_exit);
  342. MODULE_AUTHOR("Miguel Boton");
  343. MODULE_LICENSE("GPL");
  344. MODULE_DESCRIPTION("Simple IO scheduler");
  345. MODULE_VERSION("0.2");