dm-cache-policy-smq.c 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819
  1. /*
  2. * Copyright (C) 2015 Red Hat. All rights reserved.
  3. *
  4. * This file is released under the GPL.
  5. */
  6. #include "dm-cache-policy.h"
  7. #include "dm-cache-policy-internal.h"
  8. #include "dm.h"
  9. #include <linux/hash.h>
  10. #include <linux/jiffies.h>
  11. #include <linux/module.h>
  12. #include <linux/mutex.h>
  13. #include <linux/vmalloc.h>
  14. #include <linux/math64.h>
  15. #define DM_MSG_PREFIX "cache-policy-smq"
  16. /*----------------------------------------------------------------*/
  17. /*
  18. * Safe division functions that return zero on divide by zero.
  19. */
  20. static unsigned safe_div(unsigned n, unsigned d)
  21. {
  22. return d ? n / d : 0u;
  23. }
  24. static unsigned safe_mod(unsigned n, unsigned d)
  25. {
  26. return d ? n % d : 0u;
  27. }
  28. /*----------------------------------------------------------------*/
  29. struct entry {
  30. unsigned hash_next:28;
  31. unsigned prev:28;
  32. unsigned next:28;
  33. unsigned level:7;
  34. bool dirty:1;
  35. bool allocated:1;
  36. bool sentinel:1;
  37. dm_oblock_t oblock;
  38. };
  39. /*----------------------------------------------------------------*/
  40. #define INDEXER_NULL ((1u << 28u) - 1u)
  41. /*
  42. * An entry_space manages a set of entries that we use for the queues.
  43. * The clean and dirty queues share entries, so this object is separate
  44. * from the queue itself.
  45. */
  46. struct entry_space {
  47. struct entry *begin;
  48. struct entry *end;
  49. };
  50. static int space_init(struct entry_space *es, unsigned nr_entries)
  51. {
  52. if (!nr_entries) {
  53. es->begin = es->end = NULL;
  54. return 0;
  55. }
  56. es->begin = vzalloc(sizeof(struct entry) * nr_entries);
  57. if (!es->begin)
  58. return -ENOMEM;
  59. es->end = es->begin + nr_entries;
  60. return 0;
  61. }
  62. static void space_exit(struct entry_space *es)
  63. {
  64. vfree(es->begin);
  65. }
  66. static struct entry *__get_entry(struct entry_space *es, unsigned block)
  67. {
  68. struct entry *e;
  69. e = es->begin + block;
  70. BUG_ON(e >= es->end);
  71. return e;
  72. }
  73. static unsigned to_index(struct entry_space *es, struct entry *e)
  74. {
  75. BUG_ON(e < es->begin || e >= es->end);
  76. return e - es->begin;
  77. }
  78. static struct entry *to_entry(struct entry_space *es, unsigned block)
  79. {
  80. if (block == INDEXER_NULL)
  81. return NULL;
  82. return __get_entry(es, block);
  83. }
  84. /*----------------------------------------------------------------*/
  85. struct ilist {
  86. unsigned nr_elts; /* excluding sentinel entries */
  87. unsigned head, tail;
  88. };
  89. static void l_init(struct ilist *l)
  90. {
  91. l->nr_elts = 0;
  92. l->head = l->tail = INDEXER_NULL;
  93. }
  94. static struct entry *l_head(struct entry_space *es, struct ilist *l)
  95. {
  96. return to_entry(es, l->head);
  97. }
  98. static struct entry *l_tail(struct entry_space *es, struct ilist *l)
  99. {
  100. return to_entry(es, l->tail);
  101. }
  102. static struct entry *l_next(struct entry_space *es, struct entry *e)
  103. {
  104. return to_entry(es, e->next);
  105. }
  106. static struct entry *l_prev(struct entry_space *es, struct entry *e)
  107. {
  108. return to_entry(es, e->prev);
  109. }
  110. static bool l_empty(struct ilist *l)
  111. {
  112. return l->head == INDEXER_NULL;
  113. }
  114. static void l_add_head(struct entry_space *es, struct ilist *l, struct entry *e)
  115. {
  116. struct entry *head = l_head(es, l);
  117. e->next = l->head;
  118. e->prev = INDEXER_NULL;
  119. if (head)
  120. head->prev = l->head = to_index(es, e);
  121. else
  122. l->head = l->tail = to_index(es, e);
  123. if (!e->sentinel)
  124. l->nr_elts++;
  125. }
  126. static void l_add_tail(struct entry_space *es, struct ilist *l, struct entry *e)
  127. {
  128. struct entry *tail = l_tail(es, l);
  129. e->next = INDEXER_NULL;
  130. e->prev = l->tail;
  131. if (tail)
  132. tail->next = l->tail = to_index(es, e);
  133. else
  134. l->head = l->tail = to_index(es, e);
  135. if (!e->sentinel)
  136. l->nr_elts++;
  137. }
  138. static void l_add_before(struct entry_space *es, struct ilist *l,
  139. struct entry *old, struct entry *e)
  140. {
  141. struct entry *prev = l_prev(es, old);
  142. if (!prev)
  143. l_add_head(es, l, e);
  144. else {
  145. e->prev = old->prev;
  146. e->next = to_index(es, old);
  147. prev->next = old->prev = to_index(es, e);
  148. if (!e->sentinel)
  149. l->nr_elts++;
  150. }
  151. }
  152. static void l_del(struct entry_space *es, struct ilist *l, struct entry *e)
  153. {
  154. struct entry *prev = l_prev(es, e);
  155. struct entry *next = l_next(es, e);
  156. if (prev)
  157. prev->next = e->next;
  158. else
  159. l->head = e->next;
  160. if (next)
  161. next->prev = e->prev;
  162. else
  163. l->tail = e->prev;
  164. if (!e->sentinel)
  165. l->nr_elts--;
  166. }
  167. static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l)
  168. {
  169. struct entry *e;
  170. for (e = l_tail(es, l); e; e = l_prev(es, e))
  171. if (!e->sentinel) {
  172. l_del(es, l, e);
  173. return e;
  174. }
  175. return NULL;
  176. }
  177. /*----------------------------------------------------------------*/
  178. /*
  179. * The stochastic-multi-queue is a set of lru lists stacked into levels.
  180. * Entries are moved up levels when they are used, which loosely orders the
  181. * most accessed entries in the top levels and least in the bottom. This
  182. * structure is *much* better than a single lru list.
  183. */
  184. #define MAX_LEVELS 64u
  185. struct queue {
  186. struct entry_space *es;
  187. unsigned nr_elts;
  188. unsigned nr_levels;
  189. struct ilist qs[MAX_LEVELS];
  190. /*
  191. * We maintain a count of the number of entries we would like in each
  192. * level.
  193. */
  194. unsigned last_target_nr_elts;
  195. unsigned nr_top_levels;
  196. unsigned nr_in_top_levels;
  197. unsigned target_count[MAX_LEVELS];
  198. };
  199. static void q_init(struct queue *q, struct entry_space *es, unsigned nr_levels)
  200. {
  201. unsigned i;
  202. q->es = es;
  203. q->nr_elts = 0;
  204. q->nr_levels = nr_levels;
  205. for (i = 0; i < q->nr_levels; i++) {
  206. l_init(q->qs + i);
  207. q->target_count[i] = 0u;
  208. }
  209. q->last_target_nr_elts = 0u;
  210. q->nr_top_levels = 0u;
  211. q->nr_in_top_levels = 0u;
  212. }
  213. static unsigned q_size(struct queue *q)
  214. {
  215. return q->nr_elts;
  216. }
  217. /*
  218. * Insert an entry to the back of the given level.
  219. */
  220. static void q_push(struct queue *q, struct entry *e)
  221. {
  222. if (!e->sentinel)
  223. q->nr_elts++;
  224. l_add_tail(q->es, q->qs + e->level, e);
  225. }
  226. static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
  227. {
  228. if (!e->sentinel)
  229. q->nr_elts++;
  230. l_add_before(q->es, q->qs + e->level, old, e);
  231. }
  232. static void q_del(struct queue *q, struct entry *e)
  233. {
  234. l_del(q->es, q->qs + e->level, e);
  235. if (!e->sentinel)
  236. q->nr_elts--;
  237. }
  238. /*
  239. * Return the oldest entry of the lowest populated level.
  240. */
  241. static struct entry *q_peek(struct queue *q, unsigned max_level, bool can_cross_sentinel)
  242. {
  243. unsigned level;
  244. struct entry *e;
  245. max_level = min(max_level, q->nr_levels);
  246. for (level = 0; level < max_level; level++)
  247. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
  248. if (e->sentinel) {
  249. if (can_cross_sentinel)
  250. continue;
  251. else
  252. break;
  253. }
  254. return e;
  255. }
  256. return NULL;
  257. }
  258. static struct entry *q_pop(struct queue *q)
  259. {
  260. struct entry *e = q_peek(q, q->nr_levels, true);
  261. if (e)
  262. q_del(q, e);
  263. return e;
  264. }
  265. /*
  266. * Pops an entry from a level that is not past a sentinel.
  267. */
  268. static struct entry *q_pop_old(struct queue *q, unsigned max_level)
  269. {
  270. struct entry *e = q_peek(q, max_level, false);
  271. if (e)
  272. q_del(q, e);
  273. return e;
  274. }
  275. /*
  276. * This function assumes there is a non-sentinel entry to pop. It's only
  277. * used by redistribute, so we know this is true. It also doesn't adjust
  278. * the q->nr_elts count.
  279. */
  280. static struct entry *__redist_pop_from(struct queue *q, unsigned level)
  281. {
  282. struct entry *e;
  283. for (; level < q->nr_levels; level++)
  284. for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e))
  285. if (!e->sentinel) {
  286. l_del(q->es, q->qs + e->level, e);
  287. return e;
  288. }
  289. return NULL;
  290. }
  291. static void q_set_targets_subrange_(struct queue *q, unsigned nr_elts, unsigned lbegin, unsigned lend)
  292. {
  293. unsigned level, nr_levels, entries_per_level, remainder;
  294. BUG_ON(lbegin > lend);
  295. BUG_ON(lend > q->nr_levels);
  296. nr_levels = lend - lbegin;
  297. entries_per_level = safe_div(nr_elts, nr_levels);
  298. remainder = safe_mod(nr_elts, nr_levels);
  299. for (level = lbegin; level < lend; level++)
  300. q->target_count[level] =
  301. (level < (lbegin + remainder)) ? entries_per_level + 1u : entries_per_level;
  302. }
  303. /*
  304. * Typically we have fewer elements in the top few levels which allows us
  305. * to adjust the promote threshold nicely.
  306. */
  307. static void q_set_targets(struct queue *q)
  308. {
  309. if (q->last_target_nr_elts == q->nr_elts)
  310. return;
  311. q->last_target_nr_elts = q->nr_elts;
  312. if (q->nr_top_levels > q->nr_levels)
  313. q_set_targets_subrange_(q, q->nr_elts, 0, q->nr_levels);
  314. else {
  315. q_set_targets_subrange_(q, q->nr_in_top_levels,
  316. q->nr_levels - q->nr_top_levels, q->nr_levels);
  317. if (q->nr_in_top_levels < q->nr_elts)
  318. q_set_targets_subrange_(q, q->nr_elts - q->nr_in_top_levels,
  319. 0, q->nr_levels - q->nr_top_levels);
  320. else
  321. q_set_targets_subrange_(q, 0, 0, q->nr_levels - q->nr_top_levels);
  322. }
  323. }
  324. static void q_redistribute(struct queue *q)
  325. {
  326. unsigned target, level;
  327. struct ilist *l, *l_above;
  328. struct entry *e;
  329. q_set_targets(q);
  330. for (level = 0u; level < q->nr_levels - 1u; level++) {
  331. l = q->qs + level;
  332. target = q->target_count[level];
  333. /*
  334. * Pull down some entries from the level above.
  335. */
  336. while (l->nr_elts < target) {
  337. e = __redist_pop_from(q, level + 1u);
  338. if (!e) {
  339. /* bug in nr_elts */
  340. break;
  341. }
  342. e->level = level;
  343. l_add_tail(q->es, l, e);
  344. }
  345. /*
  346. * Push some entries up.
  347. */
  348. l_above = q->qs + level + 1u;
  349. while (l->nr_elts > target) {
  350. e = l_pop_tail(q->es, l);
  351. if (!e)
  352. /* bug in nr_elts */
  353. break;
  354. e->level = level + 1u;
  355. l_add_head(q->es, l_above, e);
  356. }
  357. }
  358. }
  359. static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels)
  360. {
  361. struct entry *de;
  362. unsigned new_level;
  363. q_del(q, e);
  364. if (extra_levels && (e->level < q->nr_levels - 1u)) {
  365. new_level = min(q->nr_levels - 1u, e->level + extra_levels);
  366. for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
  367. if (de->sentinel)
  368. continue;
  369. q_del(q, de);
  370. de->level = e->level;
  371. if (dest)
  372. q_push_before(q, dest, de);
  373. else
  374. q_push(q, de);
  375. break;
  376. }
  377. e->level = new_level;
  378. }
  379. q_push(q, e);
  380. }
  381. static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
  382. {
  383. q_requeue_before(q, NULL, e, extra_levels);
  384. }
  385. /*----------------------------------------------------------------*/
  386. #define FP_SHIFT 8
  387. #define SIXTEENTH (1u << (FP_SHIFT - 4u))
  388. #define EIGHTH (1u << (FP_SHIFT - 3u))
  389. struct stats {
  390. unsigned hit_threshold;
  391. unsigned hits;
  392. unsigned misses;
  393. };
  394. enum performance {
  395. Q_POOR,
  396. Q_FAIR,
  397. Q_WELL
  398. };
  399. static void stats_init(struct stats *s, unsigned nr_levels)
  400. {
  401. s->hit_threshold = (nr_levels * 3u) / 4u;
  402. s->hits = 0u;
  403. s->misses = 0u;
  404. }
  405. static void stats_reset(struct stats *s)
  406. {
  407. s->hits = s->misses = 0u;
  408. }
  409. static void stats_level_accessed(struct stats *s, unsigned level)
  410. {
  411. if (level >= s->hit_threshold)
  412. s->hits++;
  413. else
  414. s->misses++;
  415. }
  416. static void stats_miss(struct stats *s)
  417. {
  418. s->misses++;
  419. }
  420. /*
  421. * There are times when we don't have any confidence in the hotspot queue.
  422. * Such as when a fresh cache is created and the blocks have been spread
  423. * out across the levels, or if an io load changes. We detect this by
  424. * seeing how often a lookup is in the top levels of the hotspot queue.
  425. */
  426. static enum performance stats_assess(struct stats *s)
  427. {
  428. unsigned confidence = safe_div(s->hits << FP_SHIFT, s->hits + s->misses);
  429. if (confidence < SIXTEENTH)
  430. return Q_POOR;
  431. else if (confidence < EIGHTH)
  432. return Q_FAIR;
  433. else
  434. return Q_WELL;
  435. }
  436. /*----------------------------------------------------------------*/
  437. struct hash_table {
  438. struct entry_space *es;
  439. unsigned long long hash_bits;
  440. unsigned *buckets;
  441. };
  442. /*
  443. * All cache entries are stored in a chained hash table. To save space we
  444. * use indexing again, and only store indexes to the next entry.
  445. */
  446. static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries)
  447. {
  448. unsigned i, nr_buckets;
  449. ht->es = es;
  450. nr_buckets = roundup_pow_of_two(max(nr_entries / 4u, 16u));
  451. ht->hash_bits = __ffs(nr_buckets);
  452. ht->buckets = vmalloc(sizeof(*ht->buckets) * nr_buckets);
  453. if (!ht->buckets)
  454. return -ENOMEM;
  455. for (i = 0; i < nr_buckets; i++)
  456. ht->buckets[i] = INDEXER_NULL;
  457. return 0;
  458. }
  459. static void h_exit(struct hash_table *ht)
  460. {
  461. vfree(ht->buckets);
  462. }
  463. static struct entry *h_head(struct hash_table *ht, unsigned bucket)
  464. {
  465. return to_entry(ht->es, ht->buckets[bucket]);
  466. }
  467. static struct entry *h_next(struct hash_table *ht, struct entry *e)
  468. {
  469. return to_entry(ht->es, e->hash_next);
  470. }
  471. static void __h_insert(struct hash_table *ht, unsigned bucket, struct entry *e)
  472. {
  473. e->hash_next = ht->buckets[bucket];
  474. ht->buckets[bucket] = to_index(ht->es, e);
  475. }
  476. static void h_insert(struct hash_table *ht, struct entry *e)
  477. {
  478. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  479. __h_insert(ht, h, e);
  480. }
  481. static struct entry *__h_lookup(struct hash_table *ht, unsigned h, dm_oblock_t oblock,
  482. struct entry **prev)
  483. {
  484. struct entry *e;
  485. *prev = NULL;
  486. for (e = h_head(ht, h); e; e = h_next(ht, e)) {
  487. if (e->oblock == oblock)
  488. return e;
  489. *prev = e;
  490. }
  491. return NULL;
  492. }
  493. static void __h_unlink(struct hash_table *ht, unsigned h,
  494. struct entry *e, struct entry *prev)
  495. {
  496. if (prev)
  497. prev->hash_next = e->hash_next;
  498. else
  499. ht->buckets[h] = e->hash_next;
  500. }
  501. /*
  502. * Also moves each entry to the front of the bucket.
  503. */
  504. static struct entry *h_lookup(struct hash_table *ht, dm_oblock_t oblock)
  505. {
  506. struct entry *e, *prev;
  507. unsigned h = hash_64(from_oblock(oblock), ht->hash_bits);
  508. e = __h_lookup(ht, h, oblock, &prev);
  509. if (e && prev) {
  510. /*
  511. * Move to the front because this entry is likely
  512. * to be hit again.
  513. */
  514. __h_unlink(ht, h, e, prev);
  515. __h_insert(ht, h, e);
  516. }
  517. return e;
  518. }
  519. static void h_remove(struct hash_table *ht, struct entry *e)
  520. {
  521. unsigned h = hash_64(from_oblock(e->oblock), ht->hash_bits);
  522. struct entry *prev;
  523. /*
  524. * The down side of using a singly linked list is we have to
  525. * iterate the bucket to remove an item.
  526. */
  527. e = __h_lookup(ht, h, e->oblock, &prev);
  528. if (e)
  529. __h_unlink(ht, h, e, prev);
  530. }
  531. /*----------------------------------------------------------------*/
  532. struct entry_alloc {
  533. struct entry_space *es;
  534. unsigned begin;
  535. unsigned nr_allocated;
  536. struct ilist free;
  537. };
  538. static void init_allocator(struct entry_alloc *ea, struct entry_space *es,
  539. unsigned begin, unsigned end)
  540. {
  541. unsigned i;
  542. ea->es = es;
  543. ea->nr_allocated = 0u;
  544. ea->begin = begin;
  545. l_init(&ea->free);
  546. for (i = begin; i != end; i++)
  547. l_add_tail(ea->es, &ea->free, __get_entry(ea->es, i));
  548. }
  549. static void init_entry(struct entry *e)
  550. {
  551. /*
  552. * We can't memset because that would clear the hotspot and
  553. * sentinel bits which remain constant.
  554. */
  555. e->hash_next = INDEXER_NULL;
  556. e->next = INDEXER_NULL;
  557. e->prev = INDEXER_NULL;
  558. e->level = 0u;
  559. e->allocated = true;
  560. }
  561. static struct entry *alloc_entry(struct entry_alloc *ea)
  562. {
  563. struct entry *e;
  564. if (l_empty(&ea->free))
  565. return NULL;
  566. e = l_pop_tail(ea->es, &ea->free);
  567. init_entry(e);
  568. ea->nr_allocated++;
  569. return e;
  570. }
  571. /*
  572. * This assumes the cblock hasn't already been allocated.
  573. */
  574. static struct entry *alloc_particular_entry(struct entry_alloc *ea, unsigned i)
  575. {
  576. struct entry *e = __get_entry(ea->es, ea->begin + i);
  577. BUG_ON(e->allocated);
  578. l_del(ea->es, &ea->free, e);
  579. init_entry(e);
  580. ea->nr_allocated++;
  581. return e;
  582. }
  583. static void free_entry(struct entry_alloc *ea, struct entry *e)
  584. {
  585. BUG_ON(!ea->nr_allocated);
  586. BUG_ON(!e->allocated);
  587. ea->nr_allocated--;
  588. e->allocated = false;
  589. l_add_tail(ea->es, &ea->free, e);
  590. }
  591. static bool allocator_empty(struct entry_alloc *ea)
  592. {
  593. return l_empty(&ea->free);
  594. }
  595. static unsigned get_index(struct entry_alloc *ea, struct entry *e)
  596. {
  597. return to_index(ea->es, e) - ea->begin;
  598. }
  599. static struct entry *get_entry(struct entry_alloc *ea, unsigned index)
  600. {
  601. return __get_entry(ea->es, ea->begin + index);
  602. }
  603. /*----------------------------------------------------------------*/
  604. #define NR_HOTSPOT_LEVELS 64u
  605. #define NR_CACHE_LEVELS 64u
  606. #define WRITEBACK_PERIOD (10 * HZ)
  607. #define DEMOTE_PERIOD (60 * HZ)
  608. #define HOTSPOT_UPDATE_PERIOD (HZ)
  609. #define CACHE_UPDATE_PERIOD (10u * HZ)
  610. struct smq_policy {
  611. struct dm_cache_policy policy;
  612. /* protects everything */
  613. spinlock_t lock;
  614. dm_cblock_t cache_size;
  615. sector_t cache_block_size;
  616. sector_t hotspot_block_size;
  617. unsigned nr_hotspot_blocks;
  618. unsigned cache_blocks_per_hotspot_block;
  619. unsigned hotspot_level_jump;
  620. struct entry_space es;
  621. struct entry_alloc writeback_sentinel_alloc;
  622. struct entry_alloc demote_sentinel_alloc;
  623. struct entry_alloc hotspot_alloc;
  624. struct entry_alloc cache_alloc;
  625. unsigned long *hotspot_hit_bits;
  626. unsigned long *cache_hit_bits;
  627. /*
  628. * We maintain three queues of entries. The cache proper,
  629. * consisting of a clean and dirty queue, containing the currently
  630. * active mappings. The hotspot queue uses a larger block size to
  631. * track blocks that are being hit frequently and potential
  632. * candidates for promotion to the cache.
  633. */
  634. struct queue hotspot;
  635. struct queue clean;
  636. struct queue dirty;
  637. struct stats hotspot_stats;
  638. struct stats cache_stats;
  639. /*
  640. * Keeps track of time, incremented by the core. We use this to
  641. * avoid attributing multiple hits within the same tick.
  642. */
  643. unsigned tick;
  644. /*
  645. * The hash tables allows us to quickly find an entry by origin
  646. * block.
  647. */
  648. struct hash_table table;
  649. struct hash_table hotspot_table;
  650. bool current_writeback_sentinels;
  651. unsigned long next_writeback_period;
  652. bool current_demote_sentinels;
  653. unsigned long next_demote_period;
  654. unsigned write_promote_level;
  655. unsigned read_promote_level;
  656. unsigned long next_hotspot_period;
  657. unsigned long next_cache_period;
  658. };
  659. /*----------------------------------------------------------------*/
  660. static struct entry *get_sentinel(struct entry_alloc *ea, unsigned level, bool which)
  661. {
  662. return get_entry(ea, which ? level : NR_CACHE_LEVELS + level);
  663. }
  664. static struct entry *writeback_sentinel(struct smq_policy *mq, unsigned level)
  665. {
  666. return get_sentinel(&mq->writeback_sentinel_alloc, level, mq->current_writeback_sentinels);
  667. }
  668. static struct entry *demote_sentinel(struct smq_policy *mq, unsigned level)
  669. {
  670. return get_sentinel(&mq->demote_sentinel_alloc, level, mq->current_demote_sentinels);
  671. }
  672. static void __update_writeback_sentinels(struct smq_policy *mq)
  673. {
  674. unsigned level;
  675. struct queue *q = &mq->dirty;
  676. struct entry *sentinel;
  677. for (level = 0; level < q->nr_levels; level++) {
  678. sentinel = writeback_sentinel(mq, level);
  679. q_del(q, sentinel);
  680. q_push(q, sentinel);
  681. }
  682. }
  683. static void __update_demote_sentinels(struct smq_policy *mq)
  684. {
  685. unsigned level;
  686. struct queue *q = &mq->clean;
  687. struct entry *sentinel;
  688. for (level = 0; level < q->nr_levels; level++) {
  689. sentinel = demote_sentinel(mq, level);
  690. q_del(q, sentinel);
  691. q_push(q, sentinel);
  692. }
  693. }
  694. static void update_sentinels(struct smq_policy *mq)
  695. {
  696. if (time_after(jiffies, mq->next_writeback_period)) {
  697. __update_writeback_sentinels(mq);
  698. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  699. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  700. }
  701. if (time_after(jiffies, mq->next_demote_period)) {
  702. __update_demote_sentinels(mq);
  703. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  704. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  705. }
  706. }
  707. static void __sentinels_init(struct smq_policy *mq)
  708. {
  709. unsigned level;
  710. struct entry *sentinel;
  711. for (level = 0; level < NR_CACHE_LEVELS; level++) {
  712. sentinel = writeback_sentinel(mq, level);
  713. sentinel->level = level;
  714. q_push(&mq->dirty, sentinel);
  715. sentinel = demote_sentinel(mq, level);
  716. sentinel->level = level;
  717. q_push(&mq->clean, sentinel);
  718. }
  719. }
  720. static void sentinels_init(struct smq_policy *mq)
  721. {
  722. mq->next_writeback_period = jiffies + WRITEBACK_PERIOD;
  723. mq->next_demote_period = jiffies + DEMOTE_PERIOD;
  724. mq->current_writeback_sentinels = false;
  725. mq->current_demote_sentinels = false;
  726. __sentinels_init(mq);
  727. mq->current_writeback_sentinels = !mq->current_writeback_sentinels;
  728. mq->current_demote_sentinels = !mq->current_demote_sentinels;
  729. __sentinels_init(mq);
  730. }
  731. /*----------------------------------------------------------------*/
  732. /*
  733. * These methods tie together the dirty queue, clean queue and hash table.
  734. */
  735. static void push_new(struct smq_policy *mq, struct entry *e)
  736. {
  737. struct queue *q = e->dirty ? &mq->dirty : &mq->clean;
  738. h_insert(&mq->table, e);
  739. q_push(q, e);
  740. }
  741. static void push(struct smq_policy *mq, struct entry *e)
  742. {
  743. struct entry *sentinel;
  744. h_insert(&mq->table, e);
  745. /*
  746. * Punch this into the queue just in front of the sentinel, to
  747. * ensure it's cleaned straight away.
  748. */
  749. if (e->dirty) {
  750. sentinel = writeback_sentinel(mq, e->level);
  751. q_push_before(&mq->dirty, sentinel, e);
  752. } else {
  753. sentinel = demote_sentinel(mq, e->level);
  754. q_push_before(&mq->clean, sentinel, e);
  755. }
  756. }
  757. /*
  758. * Removes an entry from cache. Removes from the hash table.
  759. */
  760. static void __del(struct smq_policy *mq, struct queue *q, struct entry *e)
  761. {
  762. q_del(q, e);
  763. h_remove(&mq->table, e);
  764. }
  765. static void del(struct smq_policy *mq, struct entry *e)
  766. {
  767. __del(mq, e->dirty ? &mq->dirty : &mq->clean, e);
  768. }
  769. static struct entry *pop_old(struct smq_policy *mq, struct queue *q, unsigned max_level)
  770. {
  771. struct entry *e = q_pop_old(q, max_level);
  772. if (e)
  773. h_remove(&mq->table, e);
  774. return e;
  775. }
  776. static dm_cblock_t infer_cblock(struct smq_policy *mq, struct entry *e)
  777. {
  778. return to_cblock(get_index(&mq->cache_alloc, e));
  779. }
  780. static void requeue(struct smq_policy *mq, struct entry *e)
  781. {
  782. struct entry *sentinel;
  783. if (!test_and_set_bit(from_cblock(infer_cblock(mq, e)), mq->cache_hit_bits)) {
  784. if (e->dirty) {
  785. sentinel = writeback_sentinel(mq, e->level);
  786. q_requeue_before(&mq->dirty, sentinel, e, 1u);
  787. } else {
  788. sentinel = demote_sentinel(mq, e->level);
  789. q_requeue_before(&mq->clean, sentinel, e, 1u);
  790. }
  791. }
  792. }
  793. static unsigned default_promote_level(struct smq_policy *mq)
  794. {
  795. /*
  796. * The promote level depends on the current performance of the
  797. * cache.
  798. *
  799. * If the cache is performing badly, then we can't afford
  800. * to promote much without causing performance to drop below that
  801. * of the origin device.
  802. *
  803. * If the cache is performing well, then we don't need to promote
  804. * much. If it isn't broken, don't fix it.
  805. *
  806. * If the cache is middling then we promote more.
  807. *
  808. * This scheme reminds me of a graph of entropy vs probability of a
  809. * binary variable.
  810. */
  811. static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
  812. unsigned hits = mq->cache_stats.hits;
  813. unsigned misses = mq->cache_stats.misses;
  814. unsigned index = safe_div(hits << 4u, hits + misses);
  815. return table[index];
  816. }
  817. static void update_promote_levels(struct smq_policy *mq)
  818. {
  819. /*
  820. * If there are unused cache entries then we want to be really
  821. * eager to promote.
  822. */
  823. unsigned threshold_level = allocator_empty(&mq->cache_alloc) ?
  824. default_promote_level(mq) : (NR_HOTSPOT_LEVELS / 2u);
  825. /*
  826. * If the hotspot queue is performing badly then we have little
  827. * confidence that we know which blocks to promote. So we cut down
  828. * the amount of promotions.
  829. */
  830. switch (stats_assess(&mq->hotspot_stats)) {
  831. case Q_POOR:
  832. threshold_level /= 4u;
  833. break;
  834. case Q_FAIR:
  835. threshold_level /= 2u;
  836. break;
  837. case Q_WELL:
  838. break;
  839. }
  840. mq->read_promote_level = NR_HOTSPOT_LEVELS - threshold_level;
  841. mq->write_promote_level = (NR_HOTSPOT_LEVELS - threshold_level) + 2u;
  842. }
  843. /*
  844. * If the hotspot queue is performing badly, then we try and move entries
  845. * around more quickly.
  846. */
  847. static void update_level_jump(struct smq_policy *mq)
  848. {
  849. switch (stats_assess(&mq->hotspot_stats)) {
  850. case Q_POOR:
  851. mq->hotspot_level_jump = 4u;
  852. break;
  853. case Q_FAIR:
  854. mq->hotspot_level_jump = 2u;
  855. break;
  856. case Q_WELL:
  857. mq->hotspot_level_jump = 1u;
  858. break;
  859. }
  860. }
  861. static void end_hotspot_period(struct smq_policy *mq)
  862. {
  863. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  864. update_promote_levels(mq);
  865. if (time_after(jiffies, mq->next_hotspot_period)) {
  866. update_level_jump(mq);
  867. q_redistribute(&mq->hotspot);
  868. stats_reset(&mq->hotspot_stats);
  869. mq->next_hotspot_period = jiffies + HOTSPOT_UPDATE_PERIOD;
  870. }
  871. }
  872. static void end_cache_period(struct smq_policy *mq)
  873. {
  874. if (time_after(jiffies, mq->next_cache_period)) {
  875. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  876. q_redistribute(&mq->dirty);
  877. q_redistribute(&mq->clean);
  878. stats_reset(&mq->cache_stats);
  879. mq->next_cache_period = jiffies + CACHE_UPDATE_PERIOD;
  880. }
  881. }
  882. static int demote_cblock(struct smq_policy *mq,
  883. struct policy_locker *locker,
  884. dm_oblock_t *oblock)
  885. {
  886. struct entry *demoted = q_peek(&mq->clean, mq->clean.nr_levels, false);
  887. if (!demoted)
  888. /*
  889. * We could get a block from mq->dirty, but that
  890. * would add extra latency to the triggering bio as it
  891. * waits for the writeback. Better to not promote this
  892. * time and hope there's a clean block next time this block
  893. * is hit.
  894. */
  895. return -ENOSPC;
  896. if (locker->fn(locker, demoted->oblock))
  897. /*
  898. * We couldn't lock this block.
  899. */
  900. return -EBUSY;
  901. del(mq, demoted);
  902. *oblock = demoted->oblock;
  903. free_entry(&mq->cache_alloc, demoted);
  904. return 0;
  905. }
  906. enum promote_result {
  907. PROMOTE_NOT,
  908. PROMOTE_TEMPORARY,
  909. PROMOTE_PERMANENT
  910. };
  911. /*
  912. * Converts a boolean into a promote result.
  913. */
  914. static enum promote_result maybe_promote(bool promote)
  915. {
  916. return promote ? PROMOTE_PERMANENT : PROMOTE_NOT;
  917. }
  918. static enum promote_result should_promote(struct smq_policy *mq, struct entry *hs_e, struct bio *bio,
  919. bool fast_promote)
  920. {
  921. if (bio_data_dir(bio) == WRITE) {
  922. if (!allocator_empty(&mq->cache_alloc) && fast_promote)
  923. return PROMOTE_TEMPORARY;
  924. else
  925. return maybe_promote(hs_e->level >= mq->write_promote_level);
  926. } else
  927. return maybe_promote(hs_e->level >= mq->read_promote_level);
  928. }
  929. static void insert_in_cache(struct smq_policy *mq, dm_oblock_t oblock,
  930. struct policy_locker *locker,
  931. struct policy_result *result, enum promote_result pr)
  932. {
  933. int r;
  934. struct entry *e;
  935. if (allocator_empty(&mq->cache_alloc)) {
  936. result->op = POLICY_REPLACE;
  937. r = demote_cblock(mq, locker, &result->old_oblock);
  938. if (r) {
  939. result->op = POLICY_MISS;
  940. return;
  941. }
  942. } else
  943. result->op = POLICY_NEW;
  944. e = alloc_entry(&mq->cache_alloc);
  945. BUG_ON(!e);
  946. e->oblock = oblock;
  947. if (pr == PROMOTE_TEMPORARY)
  948. push(mq, e);
  949. else
  950. push_new(mq, e);
  951. result->cblock = infer_cblock(mq, e);
  952. }
  953. static dm_oblock_t to_hblock(struct smq_policy *mq, dm_oblock_t b)
  954. {
  955. sector_t r = from_oblock(b);
  956. (void) sector_div(r, mq->cache_blocks_per_hotspot_block);
  957. return to_oblock(r);
  958. }
  959. static struct entry *update_hotspot_queue(struct smq_policy *mq, dm_oblock_t b, struct bio *bio)
  960. {
  961. unsigned hi;
  962. dm_oblock_t hb = to_hblock(mq, b);
  963. struct entry *e = h_lookup(&mq->hotspot_table, hb);
  964. if (e) {
  965. stats_level_accessed(&mq->hotspot_stats, e->level);
  966. hi = get_index(&mq->hotspot_alloc, e);
  967. q_requeue(&mq->hotspot, e,
  968. test_and_set_bit(hi, mq->hotspot_hit_bits) ?
  969. 0u : mq->hotspot_level_jump);
  970. } else {
  971. stats_miss(&mq->hotspot_stats);
  972. e = alloc_entry(&mq->hotspot_alloc);
  973. if (!e) {
  974. e = q_pop(&mq->hotspot);
  975. if (e) {
  976. h_remove(&mq->hotspot_table, e);
  977. hi = get_index(&mq->hotspot_alloc, e);
  978. clear_bit(hi, mq->hotspot_hit_bits);
  979. }
  980. }
  981. if (e) {
  982. e->oblock = hb;
  983. q_push(&mq->hotspot, e);
  984. h_insert(&mq->hotspot_table, e);
  985. }
  986. }
  987. return e;
  988. }
  989. /*
  990. * Looks the oblock up in the hash table, then decides whether to put in
  991. * pre_cache, or cache etc.
  992. */
  993. static int map(struct smq_policy *mq, struct bio *bio, dm_oblock_t oblock,
  994. bool can_migrate, bool fast_promote,
  995. struct policy_locker *locker, struct policy_result *result)
  996. {
  997. struct entry *e, *hs_e;
  998. enum promote_result pr;
  999. hs_e = update_hotspot_queue(mq, oblock, bio);
  1000. e = h_lookup(&mq->table, oblock);
  1001. if (e) {
  1002. stats_level_accessed(&mq->cache_stats, e->level);
  1003. requeue(mq, e);
  1004. result->op = POLICY_HIT;
  1005. result->cblock = infer_cblock(mq, e);
  1006. } else {
  1007. stats_miss(&mq->cache_stats);
  1008. pr = should_promote(mq, hs_e, bio, fast_promote);
  1009. if (pr == PROMOTE_NOT)
  1010. result->op = POLICY_MISS;
  1011. else {
  1012. if (!can_migrate) {
  1013. result->op = POLICY_MISS;
  1014. return -EWOULDBLOCK;
  1015. }
  1016. insert_in_cache(mq, oblock, locker, result, pr);
  1017. }
  1018. }
  1019. return 0;
  1020. }
  1021. /*----------------------------------------------------------------*/
  1022. /*
  1023. * Public interface, via the policy struct. See dm-cache-policy.h for a
  1024. * description of these.
  1025. */
  1026. static struct smq_policy *to_smq_policy(struct dm_cache_policy *p)
  1027. {
  1028. return container_of(p, struct smq_policy, policy);
  1029. }
  1030. static void smq_destroy(struct dm_cache_policy *p)
  1031. {
  1032. struct smq_policy *mq = to_smq_policy(p);
  1033. h_exit(&mq->hotspot_table);
  1034. h_exit(&mq->table);
  1035. free_bitset(mq->hotspot_hit_bits);
  1036. free_bitset(mq->cache_hit_bits);
  1037. space_exit(&mq->es);
  1038. kfree(mq);
  1039. }
  1040. static int smq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
  1041. bool can_block, bool can_migrate, bool fast_promote,
  1042. struct bio *bio, struct policy_locker *locker,
  1043. struct policy_result *result)
  1044. {
  1045. int r;
  1046. unsigned long flags;
  1047. struct smq_policy *mq = to_smq_policy(p);
  1048. result->op = POLICY_MISS;
  1049. spin_lock_irqsave(&mq->lock, flags);
  1050. r = map(mq, bio, oblock, can_migrate, fast_promote, locker, result);
  1051. spin_unlock_irqrestore(&mq->lock, flags);
  1052. return r;
  1053. }
  1054. static int smq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
  1055. {
  1056. int r;
  1057. unsigned long flags;
  1058. struct smq_policy *mq = to_smq_policy(p);
  1059. struct entry *e;
  1060. spin_lock_irqsave(&mq->lock, flags);
  1061. e = h_lookup(&mq->table, oblock);
  1062. if (e) {
  1063. *cblock = infer_cblock(mq, e);
  1064. r = 0;
  1065. } else
  1066. r = -ENOENT;
  1067. spin_unlock_irqrestore(&mq->lock, flags);
  1068. return r;
  1069. }
  1070. static void __smq_set_clear_dirty(struct smq_policy *mq, dm_oblock_t oblock, bool set)
  1071. {
  1072. struct entry *e;
  1073. e = h_lookup(&mq->table, oblock);
  1074. BUG_ON(!e);
  1075. del(mq, e);
  1076. e->dirty = set;
  1077. push(mq, e);
  1078. }
  1079. static void smq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
  1080. {
  1081. unsigned long flags;
  1082. struct smq_policy *mq = to_smq_policy(p);
  1083. spin_lock_irqsave(&mq->lock, flags);
  1084. __smq_set_clear_dirty(mq, oblock, true);
  1085. spin_unlock_irqrestore(&mq->lock, flags);
  1086. }
  1087. static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
  1088. {
  1089. struct smq_policy *mq = to_smq_policy(p);
  1090. unsigned long flags;
  1091. spin_lock_irqsave(&mq->lock, flags);
  1092. __smq_set_clear_dirty(mq, oblock, false);
  1093. spin_unlock_irqrestore(&mq->lock, flags);
  1094. }
  1095. static unsigned random_level(dm_cblock_t cblock)
  1096. {
  1097. return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
  1098. }
  1099. static int smq_load_mapping(struct dm_cache_policy *p,
  1100. dm_oblock_t oblock, dm_cblock_t cblock,
  1101. uint32_t hint, bool hint_valid)
  1102. {
  1103. struct smq_policy *mq = to_smq_policy(p);
  1104. struct entry *e;
  1105. e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
  1106. e->oblock = oblock;
  1107. e->dirty = false; /* this gets corrected in a minute */
  1108. e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
  1109. push(mq, e);
  1110. return 0;
  1111. }
  1112. static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
  1113. {
  1114. struct smq_policy *mq = to_smq_policy(p);
  1115. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1116. if (!e->allocated)
  1117. return 0;
  1118. return e->level;
  1119. }
  1120. static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
  1121. {
  1122. struct entry *e;
  1123. e = h_lookup(&mq->table, oblock);
  1124. BUG_ON(!e);
  1125. del(mq, e);
  1126. free_entry(&mq->cache_alloc, e);
  1127. }
  1128. static void smq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
  1129. {
  1130. struct smq_policy *mq = to_smq_policy(p);
  1131. unsigned long flags;
  1132. spin_lock_irqsave(&mq->lock, flags);
  1133. __remove_mapping(mq, oblock);
  1134. spin_unlock_irqrestore(&mq->lock, flags);
  1135. }
  1136. static int __remove_cblock(struct smq_policy *mq, dm_cblock_t cblock)
  1137. {
  1138. struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
  1139. if (!e || !e->allocated)
  1140. return -ENODATA;
  1141. del(mq, e);
  1142. free_entry(&mq->cache_alloc, e);
  1143. return 0;
  1144. }
  1145. static int smq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
  1146. {
  1147. int r;
  1148. unsigned long flags;
  1149. struct smq_policy *mq = to_smq_policy(p);
  1150. spin_lock_irqsave(&mq->lock, flags);
  1151. r = __remove_cblock(mq, cblock);
  1152. spin_unlock_irqrestore(&mq->lock, flags);
  1153. return r;
  1154. }
  1155. #define CLEAN_TARGET_CRITICAL 5u /* percent */
  1156. static bool clean_target_met(struct smq_policy *mq, bool critical)
  1157. {
  1158. if (critical) {
  1159. /*
  1160. * Cache entries may not be populated. So we're cannot rely on the
  1161. * size of the clean queue.
  1162. */
  1163. unsigned nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
  1164. unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_CRITICAL / 100u;
  1165. return nr_clean >= target;
  1166. } else
  1167. return !q_size(&mq->dirty);
  1168. }
  1169. static int __smq_writeback_work(struct smq_policy *mq, dm_oblock_t *oblock,
  1170. dm_cblock_t *cblock, bool critical_only)
  1171. {
  1172. struct entry *e = NULL;
  1173. bool target_met = clean_target_met(mq, critical_only);
  1174. if (critical_only)
  1175. /*
  1176. * Always try and keep the bottom level clean.
  1177. */
  1178. e = pop_old(mq, &mq->dirty, target_met ? 1u : mq->dirty.nr_levels);
  1179. else
  1180. e = pop_old(mq, &mq->dirty, mq->dirty.nr_levels);
  1181. if (!e)
  1182. return -ENODATA;
  1183. *oblock = e->oblock;
  1184. *cblock = infer_cblock(mq, e);
  1185. e->dirty = false;
  1186. push_new(mq, e);
  1187. return 0;
  1188. }
  1189. static int smq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
  1190. dm_cblock_t *cblock, bool critical_only)
  1191. {
  1192. int r;
  1193. unsigned long flags;
  1194. struct smq_policy *mq = to_smq_policy(p);
  1195. spin_lock_irqsave(&mq->lock, flags);
  1196. r = __smq_writeback_work(mq, oblock, cblock, critical_only);
  1197. spin_unlock_irqrestore(&mq->lock, flags);
  1198. return r;
  1199. }
  1200. static void __force_mapping(struct smq_policy *mq,
  1201. dm_oblock_t current_oblock, dm_oblock_t new_oblock)
  1202. {
  1203. struct entry *e = h_lookup(&mq->table, current_oblock);
  1204. if (e) {
  1205. del(mq, e);
  1206. e->oblock = new_oblock;
  1207. e->dirty = true;
  1208. push(mq, e);
  1209. }
  1210. }
  1211. static void smq_force_mapping(struct dm_cache_policy *p,
  1212. dm_oblock_t current_oblock, dm_oblock_t new_oblock)
  1213. {
  1214. unsigned long flags;
  1215. struct smq_policy *mq = to_smq_policy(p);
  1216. spin_lock_irqsave(&mq->lock, flags);
  1217. __force_mapping(mq, current_oblock, new_oblock);
  1218. spin_unlock_irqrestore(&mq->lock, flags);
  1219. }
  1220. static dm_cblock_t smq_residency(struct dm_cache_policy *p)
  1221. {
  1222. dm_cblock_t r;
  1223. unsigned long flags;
  1224. struct smq_policy *mq = to_smq_policy(p);
  1225. spin_lock_irqsave(&mq->lock, flags);
  1226. r = to_cblock(mq->cache_alloc.nr_allocated);
  1227. spin_unlock_irqrestore(&mq->lock, flags);
  1228. return r;
  1229. }
  1230. static void smq_tick(struct dm_cache_policy *p, bool can_block)
  1231. {
  1232. struct smq_policy *mq = to_smq_policy(p);
  1233. unsigned long flags;
  1234. spin_lock_irqsave(&mq->lock, flags);
  1235. mq->tick++;
  1236. update_sentinels(mq);
  1237. end_hotspot_period(mq);
  1238. end_cache_period(mq);
  1239. spin_unlock_irqrestore(&mq->lock, flags);
  1240. }
  1241. /*
  1242. * smq has no config values, but the old mq policy did. To avoid breaking
  1243. * software we continue to accept these configurables for the mq policy,
  1244. * but they have no effect.
  1245. */
  1246. static int mq_set_config_value(struct dm_cache_policy *p,
  1247. const char *key, const char *value)
  1248. {
  1249. unsigned long tmp;
  1250. if (kstrtoul(value, 10, &tmp))
  1251. return -EINVAL;
  1252. if (!strcasecmp(key, "random_threshold") ||
  1253. !strcasecmp(key, "sequential_threshold") ||
  1254. !strcasecmp(key, "discard_promote_adjustment") ||
  1255. !strcasecmp(key, "read_promote_adjustment") ||
  1256. !strcasecmp(key, "write_promote_adjustment")) {
  1257. DMWARN("tunable '%s' no longer has any effect, mq policy is now an alias for smq", key);
  1258. return 0;
  1259. }
  1260. return -EINVAL;
  1261. }
  1262. static int mq_emit_config_values(struct dm_cache_policy *p, char *result,
  1263. unsigned maxlen, ssize_t *sz_ptr)
  1264. {
  1265. ssize_t sz = *sz_ptr;
  1266. DMEMIT("10 random_threshold 0 "
  1267. "sequential_threshold 0 "
  1268. "discard_promote_adjustment 0 "
  1269. "read_promote_adjustment 0 "
  1270. "write_promote_adjustment 0 ");
  1271. *sz_ptr = sz;
  1272. return 0;
  1273. }
  1274. /* Init the policy plugin interface function pointers. */
  1275. static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
  1276. {
  1277. mq->policy.destroy = smq_destroy;
  1278. mq->policy.map = smq_map;
  1279. mq->policy.lookup = smq_lookup;
  1280. mq->policy.set_dirty = smq_set_dirty;
  1281. mq->policy.clear_dirty = smq_clear_dirty;
  1282. mq->policy.load_mapping = smq_load_mapping;
  1283. mq->policy.get_hint = smq_get_hint;
  1284. mq->policy.remove_mapping = smq_remove_mapping;
  1285. mq->policy.remove_cblock = smq_remove_cblock;
  1286. mq->policy.writeback_work = smq_writeback_work;
  1287. mq->policy.force_mapping = smq_force_mapping;
  1288. mq->policy.residency = smq_residency;
  1289. mq->policy.tick = smq_tick;
  1290. if (mimic_mq) {
  1291. mq->policy.set_config_value = mq_set_config_value;
  1292. mq->policy.emit_config_values = mq_emit_config_values;
  1293. }
  1294. }
  1295. static bool too_many_hotspot_blocks(sector_t origin_size,
  1296. sector_t hotspot_block_size,
  1297. unsigned nr_hotspot_blocks)
  1298. {
  1299. return (hotspot_block_size * nr_hotspot_blocks) > origin_size;
  1300. }
  1301. static void calc_hotspot_params(sector_t origin_size,
  1302. sector_t cache_block_size,
  1303. unsigned nr_cache_blocks,
  1304. sector_t *hotspot_block_size,
  1305. unsigned *nr_hotspot_blocks)
  1306. {
  1307. *hotspot_block_size = cache_block_size * 16u;
  1308. *nr_hotspot_blocks = max(nr_cache_blocks / 4u, 1024u);
  1309. while ((*hotspot_block_size > cache_block_size) &&
  1310. too_many_hotspot_blocks(origin_size, *hotspot_block_size, *nr_hotspot_blocks))
  1311. *hotspot_block_size /= 2u;
  1312. }
  1313. static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
  1314. sector_t origin_size,
  1315. sector_t cache_block_size,
  1316. bool mimic_mq)
  1317. {
  1318. unsigned i;
  1319. unsigned nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
  1320. unsigned total_sentinels = 2u * nr_sentinels_per_queue;
  1321. struct smq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
  1322. if (!mq)
  1323. return NULL;
  1324. init_policy_functions(mq, mimic_mq);
  1325. mq->cache_size = cache_size;
  1326. mq->cache_block_size = cache_block_size;
  1327. calc_hotspot_params(origin_size, cache_block_size, from_cblock(cache_size),
  1328. &mq->hotspot_block_size, &mq->nr_hotspot_blocks);
  1329. mq->cache_blocks_per_hotspot_block = div64_u64(mq->hotspot_block_size, mq->cache_block_size);
  1330. mq->hotspot_level_jump = 1u;
  1331. if (space_init(&mq->es, total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size))) {
  1332. DMERR("couldn't initialize entry space");
  1333. goto bad_pool_init;
  1334. }
  1335. init_allocator(&mq->writeback_sentinel_alloc, &mq->es, 0, nr_sentinels_per_queue);
  1336. for (i = 0; i < nr_sentinels_per_queue; i++)
  1337. get_entry(&mq->writeback_sentinel_alloc, i)->sentinel = true;
  1338. init_allocator(&mq->demote_sentinel_alloc, &mq->es, nr_sentinels_per_queue, total_sentinels);
  1339. for (i = 0; i < nr_sentinels_per_queue; i++)
  1340. get_entry(&mq->demote_sentinel_alloc, i)->sentinel = true;
  1341. init_allocator(&mq->hotspot_alloc, &mq->es, total_sentinels,
  1342. total_sentinels + mq->nr_hotspot_blocks);
  1343. init_allocator(&mq->cache_alloc, &mq->es,
  1344. total_sentinels + mq->nr_hotspot_blocks,
  1345. total_sentinels + mq->nr_hotspot_blocks + from_cblock(cache_size));
  1346. mq->hotspot_hit_bits = alloc_bitset(mq->nr_hotspot_blocks);
  1347. if (!mq->hotspot_hit_bits) {
  1348. DMERR("couldn't allocate hotspot hit bitset");
  1349. goto bad_hotspot_hit_bits;
  1350. }
  1351. clear_bitset(mq->hotspot_hit_bits, mq->nr_hotspot_blocks);
  1352. if (from_cblock(cache_size)) {
  1353. mq->cache_hit_bits = alloc_bitset(from_cblock(cache_size));
  1354. if (!mq->cache_hit_bits) {
  1355. DMERR("couldn't allocate cache hit bitset");
  1356. goto bad_cache_hit_bits;
  1357. }
  1358. clear_bitset(mq->cache_hit_bits, from_cblock(mq->cache_size));
  1359. } else
  1360. mq->cache_hit_bits = NULL;
  1361. mq->tick = 0;
  1362. spin_lock_init(&mq->lock);
  1363. q_init(&mq->hotspot, &mq->es, NR_HOTSPOT_LEVELS);
  1364. mq->hotspot.nr_top_levels = 8;
  1365. mq->hotspot.nr_in_top_levels = min(mq->nr_hotspot_blocks / NR_HOTSPOT_LEVELS,
  1366. from_cblock(mq->cache_size) / mq->cache_blocks_per_hotspot_block);
  1367. q_init(&mq->clean, &mq->es, NR_CACHE_LEVELS);
  1368. q_init(&mq->dirty, &mq->es, NR_CACHE_LEVELS);
  1369. stats_init(&mq->hotspot_stats, NR_HOTSPOT_LEVELS);
  1370. stats_init(&mq->cache_stats, NR_CACHE_LEVELS);
  1371. if (h_init(&mq->table, &mq->es, from_cblock(cache_size)))
  1372. goto bad_alloc_table;
  1373. if (h_init(&mq->hotspot_table, &mq->es, mq->nr_hotspot_blocks))
  1374. goto bad_alloc_hotspot_table;
  1375. sentinels_init(mq);
  1376. mq->write_promote_level = mq->read_promote_level = NR_HOTSPOT_LEVELS;
  1377. mq->next_hotspot_period = jiffies;
  1378. mq->next_cache_period = jiffies;
  1379. return &mq->policy;
  1380. bad_alloc_hotspot_table:
  1381. h_exit(&mq->table);
  1382. bad_alloc_table:
  1383. free_bitset(mq->cache_hit_bits);
  1384. bad_cache_hit_bits:
  1385. free_bitset(mq->hotspot_hit_bits);
  1386. bad_hotspot_hit_bits:
  1387. space_exit(&mq->es);
  1388. bad_pool_init:
  1389. kfree(mq);
  1390. return NULL;
  1391. }
  1392. static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
  1393. sector_t origin_size,
  1394. sector_t cache_block_size)
  1395. {
  1396. return __smq_create(cache_size, origin_size, cache_block_size, false);
  1397. }
  1398. static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
  1399. sector_t origin_size,
  1400. sector_t cache_block_size)
  1401. {
  1402. return __smq_create(cache_size, origin_size, cache_block_size, true);
  1403. }
  1404. /*----------------------------------------------------------------*/
  1405. static struct dm_cache_policy_type smq_policy_type = {
  1406. .name = "smq",
  1407. .version = {1, 5, 0},
  1408. .hint_size = 4,
  1409. .owner = THIS_MODULE,
  1410. .create = smq_create
  1411. };
  1412. static struct dm_cache_policy_type mq_policy_type = {
  1413. .name = "mq",
  1414. .version = {1, 5, 0},
  1415. .hint_size = 4,
  1416. .owner = THIS_MODULE,
  1417. .create = mq_create,
  1418. };
  1419. static struct dm_cache_policy_type default_policy_type = {
  1420. .name = "default",
  1421. .version = {1, 5, 0},
  1422. .hint_size = 4,
  1423. .owner = THIS_MODULE,
  1424. .create = smq_create,
  1425. .real = &smq_policy_type
  1426. };
  1427. static int __init smq_init(void)
  1428. {
  1429. int r;
  1430. r = dm_cache_policy_register(&smq_policy_type);
  1431. if (r) {
  1432. DMERR("register failed %d", r);
  1433. return -ENOMEM;
  1434. }
  1435. r = dm_cache_policy_register(&mq_policy_type);
  1436. if (r) {
  1437. DMERR("register failed (as mq) %d", r);
  1438. dm_cache_policy_unregister(&smq_policy_type);
  1439. return -ENOMEM;
  1440. }
  1441. r = dm_cache_policy_register(&default_policy_type);
  1442. if (r) {
  1443. DMERR("register failed (as default) %d", r);
  1444. dm_cache_policy_unregister(&mq_policy_type);
  1445. dm_cache_policy_unregister(&smq_policy_type);
  1446. return -ENOMEM;
  1447. }
  1448. return 0;
  1449. }
  1450. static void __exit smq_exit(void)
  1451. {
  1452. dm_cache_policy_unregister(&smq_policy_type);
  1453. dm_cache_policy_unregister(&mq_policy_type);
  1454. dm_cache_policy_unregister(&default_policy_type);
  1455. }
  1456. module_init(smq_init);
  1457. module_exit(smq_exit);
  1458. MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
  1459. MODULE_LICENSE("GPL");
  1460. MODULE_DESCRIPTION("smq cache policy");
  1461. MODULE_ALIAS("dm-cache-default");
  1462. MODULE_ALIAS("dm-cache-mq");