blk-mq-tag.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. * Tag allocation using scalable bitmaps. Uses active queue tracking to support
  3. * fairer distribution of tags between multiple submitters when a shared tag map
  4. * is used.
  5. *
  6. * Copyright (C) 2013-2014 Jens Axboe
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/module.h>
  10. #include <linux/blk-mq.h>
  11. #include "blk.h"
  12. #include "blk-mq.h"
  13. #include "blk-mq-tag.h"
  14. bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
  15. {
  16. if (!tags)
  17. return true;
  18. return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
  19. }
  20. /*
  21. * If a previously inactive queue goes active, bump the active user count.
  22. */
  23. bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
  24. {
  25. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
  26. !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  27. atomic_inc(&hctx->tags->active_queues);
  28. return true;
  29. }
  30. /*
  31. * Wakeup all potentially sleeping on tags
  32. */
  33. void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
  34. {
  35. sbitmap_queue_wake_all(&tags->bitmap_tags);
  36. if (include_reserve)
  37. sbitmap_queue_wake_all(&tags->breserved_tags);
  38. }
  39. /*
  40. * If a previously busy queue goes inactive, potential waiters could now
  41. * be allowed to queue. Wake them up and check.
  42. */
  43. void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
  44. {
  45. struct blk_mq_tags *tags = hctx->tags;
  46. if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  47. return;
  48. atomic_dec(&tags->active_queues);
  49. blk_mq_tag_wakeup_all(tags, false);
  50. }
  51. /*
  52. * For shared tag users, we track the number of currently active users
  53. * and attempt to provide a fair share of the tag depth for each of them.
  54. */
  55. static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
  56. struct sbitmap_queue *bt)
  57. {
  58. unsigned int depth, users;
  59. if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
  60. return true;
  61. if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
  62. return true;
  63. /*
  64. * Don't try dividing an ant
  65. */
  66. if (bt->sb.depth == 1)
  67. return true;
  68. users = atomic_read(&hctx->tags->active_queues);
  69. if (!users)
  70. return true;
  71. /*
  72. * Allow at least some tags
  73. */
  74. depth = max((bt->sb.depth + users - 1) / users, 4U);
  75. return atomic_read(&hctx->nr_active) < depth;
  76. }
  77. static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt)
  78. {
  79. if (!hctx_may_queue(hctx, bt))
  80. return -1;
  81. return __sbitmap_queue_get(bt);
  82. }
  83. static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
  84. struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
  85. {
  86. struct sbq_wait_state *ws;
  87. DEFINE_WAIT(wait);
  88. int tag;
  89. tag = __bt_get(hctx, bt);
  90. if (tag != -1)
  91. return tag;
  92. if (data->flags & BLK_MQ_REQ_NOWAIT)
  93. return -1;
  94. ws = bt_wait_ptr(bt, hctx);
  95. do {
  96. prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
  97. tag = __bt_get(hctx, bt);
  98. if (tag != -1)
  99. break;
  100. /*
  101. * We're out of tags on this hardware queue, kick any
  102. * pending IO submits before going to sleep waiting for
  103. * some to complete. Note that hctx can be NULL here for
  104. * reserved tag allocation.
  105. */
  106. if (hctx)
  107. blk_mq_run_hw_queue(hctx, false);
  108. /*
  109. * Retry tag allocation after running the hardware queue,
  110. * as running the queue may also have found completions.
  111. */
  112. tag = __bt_get(hctx, bt);
  113. if (tag != -1)
  114. break;
  115. blk_mq_put_ctx(data->ctx);
  116. io_schedule();
  117. data->ctx = blk_mq_get_ctx(data->q);
  118. data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
  119. if (data->flags & BLK_MQ_REQ_RESERVED) {
  120. bt = &data->hctx->tags->breserved_tags;
  121. } else {
  122. hctx = data->hctx;
  123. bt = &hctx->tags->bitmap_tags;
  124. }
  125. finish_wait(&ws->wait, &wait);
  126. ws = bt_wait_ptr(bt, hctx);
  127. } while (1);
  128. finish_wait(&ws->wait, &wait);
  129. return tag;
  130. }
  131. static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
  132. {
  133. int tag;
  134. tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
  135. data->hctx->tags);
  136. if (tag >= 0)
  137. return tag + data->hctx->tags->nr_reserved_tags;
  138. return BLK_MQ_TAG_FAIL;
  139. }
  140. static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
  141. {
  142. int tag;
  143. if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
  144. WARN_ON_ONCE(1);
  145. return BLK_MQ_TAG_FAIL;
  146. }
  147. tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
  148. data->hctx->tags);
  149. if (tag < 0)
  150. return BLK_MQ_TAG_FAIL;
  151. return tag;
  152. }
  153. unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
  154. {
  155. if (data->flags & BLK_MQ_REQ_RESERVED)
  156. return __blk_mq_get_reserved_tag(data);
  157. return __blk_mq_get_tag(data);
  158. }
  159. void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
  160. unsigned int tag)
  161. {
  162. struct blk_mq_tags *tags = hctx->tags;
  163. if (tag >= tags->nr_reserved_tags) {
  164. const int real_tag = tag - tags->nr_reserved_tags;
  165. BUG_ON(real_tag >= tags->nr_tags);
  166. sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu);
  167. } else {
  168. BUG_ON(tag >= tags->nr_reserved_tags);
  169. sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu);
  170. }
  171. }
  172. struct bt_iter_data {
  173. struct blk_mq_hw_ctx *hctx;
  174. busy_iter_fn *fn;
  175. void *data;
  176. bool reserved;
  177. };
  178. static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  179. {
  180. struct bt_iter_data *iter_data = data;
  181. struct blk_mq_hw_ctx *hctx = iter_data->hctx;
  182. struct blk_mq_tags *tags = hctx->tags;
  183. bool reserved = iter_data->reserved;
  184. struct request *rq;
  185. if (!reserved)
  186. bitnr += tags->nr_reserved_tags;
  187. rq = tags->rqs[bitnr];
  188. if (rq->q == hctx->queue)
  189. iter_data->fn(hctx, rq, iter_data->data, reserved);
  190. return true;
  191. }
  192. static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
  193. busy_iter_fn *fn, void *data, bool reserved)
  194. {
  195. struct bt_iter_data iter_data = {
  196. .hctx = hctx,
  197. .fn = fn,
  198. .data = data,
  199. .reserved = reserved,
  200. };
  201. sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
  202. }
  203. struct bt_tags_iter_data {
  204. struct blk_mq_tags *tags;
  205. busy_tag_iter_fn *fn;
  206. void *data;
  207. bool reserved;
  208. };
  209. static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  210. {
  211. struct bt_tags_iter_data *iter_data = data;
  212. struct blk_mq_tags *tags = iter_data->tags;
  213. bool reserved = iter_data->reserved;
  214. struct request *rq;
  215. if (!reserved)
  216. bitnr += tags->nr_reserved_tags;
  217. rq = tags->rqs[bitnr];
  218. iter_data->fn(rq, iter_data->data, reserved);
  219. return true;
  220. }
  221. static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt,
  222. busy_tag_iter_fn *fn, void *data, bool reserved)
  223. {
  224. struct bt_tags_iter_data iter_data = {
  225. .tags = tags,
  226. .fn = fn,
  227. .data = data,
  228. .reserved = reserved,
  229. };
  230. if (tags->rqs)
  231. sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data);
  232. }
  233. static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags,
  234. busy_tag_iter_fn *fn, void *priv)
  235. {
  236. if (tags->nr_reserved_tags)
  237. bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true);
  238. bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false);
  239. }
  240. void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
  241. busy_tag_iter_fn *fn, void *priv)
  242. {
  243. int i;
  244. for (i = 0; i < tagset->nr_hw_queues; i++) {
  245. if (tagset->tags && tagset->tags[i])
  246. blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv);
  247. }
  248. }
  249. EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
  250. int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
  251. {
  252. int i, j, ret = 0;
  253. if (!set->ops->reinit_request)
  254. goto out;
  255. for (i = 0; i < set->nr_hw_queues; i++) {
  256. struct blk_mq_tags *tags = set->tags[i];
  257. for (j = 0; j < tags->nr_tags; j++) {
  258. if (!tags->rqs[j])
  259. continue;
  260. ret = set->ops->reinit_request(set->driver_data,
  261. tags->rqs[j]);
  262. if (ret)
  263. goto out;
  264. }
  265. }
  266. out:
  267. return ret;
  268. }
  269. EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
  270. void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
  271. void *priv)
  272. {
  273. struct blk_mq_hw_ctx *hctx;
  274. int i;
  275. queue_for_each_hw_ctx(q, hctx, i) {
  276. struct blk_mq_tags *tags = hctx->tags;
  277. /*
  278. * If not software queues are currently mapped to this
  279. * hardware queue, there's nothing to check
  280. */
  281. if (!blk_mq_hw_queue_mapped(hctx))
  282. continue;
  283. if (tags->nr_reserved_tags)
  284. bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
  285. bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
  286. }
  287. }
  288. static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
  289. {
  290. return bt->sb.depth - sbitmap_weight(&bt->sb);
  291. }
  292. static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
  293. bool round_robin, int node)
  294. {
  295. return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL,
  296. node);
  297. }
  298. static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
  299. int node, int alloc_policy)
  300. {
  301. unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
  302. bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
  303. if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
  304. goto free_tags;
  305. if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin,
  306. node))
  307. goto free_bitmap_tags;
  308. return tags;
  309. free_bitmap_tags:
  310. sbitmap_queue_free(&tags->bitmap_tags);
  311. free_tags:
  312. kfree(tags);
  313. return NULL;
  314. }
  315. struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
  316. unsigned int reserved_tags,
  317. int node, int alloc_policy)
  318. {
  319. struct blk_mq_tags *tags;
  320. if (total_tags > BLK_MQ_TAG_MAX) {
  321. pr_err("blk-mq: tag depth too large\n");
  322. return NULL;
  323. }
  324. tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node);
  325. if (!tags)
  326. return NULL;
  327. tags->nr_tags = total_tags;
  328. tags->nr_reserved_tags = reserved_tags;
  329. return blk_mq_init_bitmap_tags(tags, node, alloc_policy);
  330. }
  331. void blk_mq_free_tags(struct blk_mq_tags *tags)
  332. {
  333. sbitmap_queue_free(&tags->bitmap_tags);
  334. sbitmap_queue_free(&tags->breserved_tags);
  335. kfree(tags);
  336. }
  337. int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
  338. {
  339. tdepth -= tags->nr_reserved_tags;
  340. if (tdepth > tags->nr_tags)
  341. return -EINVAL;
  342. /*
  343. * Don't need (or can't) update reserved tags here, they remain
  344. * static and should never need resizing.
  345. */
  346. sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
  347. blk_mq_tag_wakeup_all(tags, false);
  348. return 0;
  349. }
  350. /**
  351. * blk_mq_unique_tag() - return a tag that is unique queue-wide
  352. * @rq: request for which to compute a unique tag
  353. *
  354. * The tag field in struct request is unique per hardware queue but not over
  355. * all hardware queues. Hence this function that returns a tag with the
  356. * hardware context index in the upper bits and the per hardware queue tag in
  357. * the lower bits.
  358. *
  359. * Note: When called for a request that is queued on a non-multiqueue request
  360. * queue, the hardware context index is set to zero.
  361. */
  362. u32 blk_mq_unique_tag(struct request *rq)
  363. {
  364. struct request_queue *q = rq->q;
  365. struct blk_mq_hw_ctx *hctx;
  366. int hwq = 0;
  367. if (q->mq_ops) {
  368. hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
  369. hwq = hctx->queue_num;
  370. }
  371. return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
  372. (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
  373. }
  374. EXPORT_SYMBOL(blk_mq_unique_tag);
  375. ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
  376. {
  377. char *orig_page = page;
  378. unsigned int free, res;
  379. if (!tags)
  380. return 0;
  381. page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
  382. "bits_per_word=%u\n",
  383. tags->nr_tags, tags->nr_reserved_tags,
  384. 1U << tags->bitmap_tags.sb.shift);
  385. free = bt_unused_tags(&tags->bitmap_tags);
  386. res = bt_unused_tags(&tags->breserved_tags);
  387. page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
  388. page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
  389. return page - orig_page;
  390. }