lru_cache.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. lru_cache.c
  3. This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
  4. Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
  5. Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
  6. Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
  7. drbd is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2, or (at your option)
  10. any later version.
  11. drbd is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with drbd; see the file COPYING. If not, write to
  17. the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. #include <linux/module.h>
  20. #include <linux/bitops.h>
  21. #include <linux/slab.h>
  22. #include <linux/string.h> /* for memset */
  23. #include <linux/seq_file.h> /* for seq_printf */
  24. #include <linux/lru_cache.h>
  25. MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
  26. "Lars Ellenberg <lars@linbit.com>");
  27. MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
  28. MODULE_LICENSE("GPL");
  29. /* this is developers aid only.
  30. * it catches concurrent access (lack of locking on the users part) */
  31. #define PARANOIA_ENTRY() do { \
  32. BUG_ON(!lc); \
  33. BUG_ON(!lc->nr_elements); \
  34. BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
  35. } while (0)
  36. #define RETURN(x...) do { \
  37. clear_bit(__LC_PARANOIA, &lc->flags); \
  38. smp_mb__after_clear_bit(); return x ; } while (0)
  39. /* BUG() if e is not one of the elements tracked by lc */
  40. #define PARANOIA_LC_ELEMENT(lc, e) do { \
  41. struct lru_cache *lc_ = (lc); \
  42. struct lc_element *e_ = (e); \
  43. unsigned i = e_->lc_index; \
  44. BUG_ON(i >= lc_->nr_elements); \
  45. BUG_ON(lc_->lc_element[i] != e_); } while (0)
  46. /**
  47. * lc_create - prepares to track objects in an active set
  48. * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
  49. * @e_count: number of elements allowed to be active simultaneously
  50. * @e_size: size of the tracked objects
  51. * @e_off: offset to the &struct lc_element member in a tracked object
  52. *
  53. * Returns a pointer to a newly initialized struct lru_cache on success,
  54. * or NULL on (allocation) failure.
  55. */
  56. struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
  57. unsigned e_count, size_t e_size, size_t e_off)
  58. {
  59. struct hlist_head *slot = NULL;
  60. struct lc_element **element = NULL;
  61. struct lru_cache *lc;
  62. struct lc_element *e;
  63. unsigned cache_obj_size = kmem_cache_size(cache);
  64. unsigned i;
  65. WARN_ON(cache_obj_size < e_size);
  66. if (cache_obj_size < e_size)
  67. return NULL;
  68. /* e_count too big; would probably fail the allocation below anyways.
  69. * for typical use cases, e_count should be few thousand at most. */
  70. if (e_count > LC_MAX_ACTIVE)
  71. return NULL;
  72. slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
  73. if (!slot)
  74. goto out_fail;
  75. element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
  76. if (!element)
  77. goto out_fail;
  78. lc = kzalloc(sizeof(*lc), GFP_KERNEL);
  79. if (!lc)
  80. goto out_fail;
  81. INIT_LIST_HEAD(&lc->in_use);
  82. INIT_LIST_HEAD(&lc->lru);
  83. INIT_LIST_HEAD(&lc->free);
  84. lc->name = name;
  85. lc->element_size = e_size;
  86. lc->element_off = e_off;
  87. lc->nr_elements = e_count;
  88. lc->new_number = LC_FREE;
  89. lc->lc_cache = cache;
  90. lc->lc_element = element;
  91. lc->lc_slot = slot;
  92. /* preallocate all objects */
  93. for (i = 0; i < e_count; i++) {
  94. void *p = kmem_cache_alloc(cache, GFP_KERNEL);
  95. if (!p)
  96. break;
  97. memset(p, 0, lc->element_size);
  98. e = p + e_off;
  99. e->lc_index = i;
  100. e->lc_number = LC_FREE;
  101. list_add(&e->list, &lc->free);
  102. element[i] = e;
  103. }
  104. if (i == e_count)
  105. return lc;
  106. /* else: could not allocate all elements, give up */
  107. for (i--; i; i--) {
  108. void *p = element[i];
  109. kmem_cache_free(cache, p - e_off);
  110. }
  111. kfree(lc);
  112. out_fail:
  113. kfree(element);
  114. kfree(slot);
  115. return NULL;
  116. }
  117. void lc_free_by_index(struct lru_cache *lc, unsigned i)
  118. {
  119. void *p = lc->lc_element[i];
  120. WARN_ON(!p);
  121. if (p) {
  122. p -= lc->element_off;
  123. kmem_cache_free(lc->lc_cache, p);
  124. }
  125. }
  126. /**
  127. * lc_destroy - frees memory allocated by lc_create()
  128. * @lc: the lru cache to destroy
  129. */
  130. void lc_destroy(struct lru_cache *lc)
  131. {
  132. unsigned i;
  133. if (!lc)
  134. return;
  135. for (i = 0; i < lc->nr_elements; i++)
  136. lc_free_by_index(lc, i);
  137. kfree(lc->lc_element);
  138. kfree(lc->lc_slot);
  139. kfree(lc);
  140. }
  141. /**
  142. * lc_reset - does a full reset for @lc and the hash table slots.
  143. * @lc: the lru cache to operate on
  144. *
  145. * It is roughly the equivalent of re-allocating a fresh lru_cache object,
  146. * basically a short cut to lc_destroy(lc); lc = lc_create(...);
  147. */
  148. void lc_reset(struct lru_cache *lc)
  149. {
  150. unsigned i;
  151. INIT_LIST_HEAD(&lc->in_use);
  152. INIT_LIST_HEAD(&lc->lru);
  153. INIT_LIST_HEAD(&lc->free);
  154. lc->used = 0;
  155. lc->hits = 0;
  156. lc->misses = 0;
  157. lc->starving = 0;
  158. lc->dirty = 0;
  159. lc->changed = 0;
  160. lc->flags = 0;
  161. lc->changing_element = NULL;
  162. lc->new_number = LC_FREE;
  163. memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
  164. for (i = 0; i < lc->nr_elements; i++) {
  165. struct lc_element *e = lc->lc_element[i];
  166. void *p = e;
  167. p -= lc->element_off;
  168. memset(p, 0, lc->element_size);
  169. /* re-init it */
  170. e->lc_index = i;
  171. e->lc_number = LC_FREE;
  172. list_add(&e->list, &lc->free);
  173. }
  174. }
  175. /**
  176. * lc_seq_printf_stats - print stats about @lc into @seq
  177. * @seq: the seq_file to print into
  178. * @lc: the lru cache to print statistics of
  179. */
  180. size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
  181. {
  182. /* NOTE:
  183. * total calls to lc_get are
  184. * (starving + hits + misses)
  185. * misses include "dirty" count (update from an other thread in
  186. * progress) and "changed", when this in fact lead to an successful
  187. * update of the cache.
  188. */
  189. return seq_printf(seq, "\t%s: used:%u/%u "
  190. "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
  191. lc->name, lc->used, lc->nr_elements,
  192. lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
  193. }
  194. static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
  195. {
  196. return lc->lc_slot + (enr % lc->nr_elements);
  197. }
  198. /**
  199. * lc_find - find element by label, if present in the hash table
  200. * @lc: The lru_cache object
  201. * @enr: element number
  202. *
  203. * Returns the pointer to an element, if the element with the requested
  204. * "label" or element number is present in the hash table,
  205. * or NULL if not found. Does not change the refcnt.
  206. */
  207. struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
  208. {
  209. struct hlist_node *n;
  210. struct lc_element *e;
  211. BUG_ON(!lc);
  212. BUG_ON(!lc->nr_elements);
  213. hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
  214. if (e->lc_number == enr)
  215. return e;
  216. }
  217. return NULL;
  218. }
  219. /* returned element will be "recycled" immediately */
  220. static struct lc_element *lc_evict(struct lru_cache *lc)
  221. {
  222. struct list_head *n;
  223. struct lc_element *e;
  224. if (list_empty(&lc->lru))
  225. return NULL;
  226. n = lc->lru.prev;
  227. e = list_entry(n, struct lc_element, list);
  228. PARANOIA_LC_ELEMENT(lc, e);
  229. list_del(&e->list);
  230. hlist_del(&e->colision);
  231. return e;
  232. }
  233. /**
  234. * lc_del - removes an element from the cache
  235. * @lc: The lru_cache object
  236. * @e: The element to remove
  237. *
  238. * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
  239. * sets @e->enr to %LC_FREE.
  240. */
  241. void lc_del(struct lru_cache *lc, struct lc_element *e)
  242. {
  243. PARANOIA_ENTRY();
  244. PARANOIA_LC_ELEMENT(lc, e);
  245. BUG_ON(e->refcnt);
  246. e->lc_number = LC_FREE;
  247. hlist_del_init(&e->colision);
  248. list_move(&e->list, &lc->free);
  249. RETURN();
  250. }
  251. static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
  252. {
  253. struct list_head *n;
  254. if (list_empty(&lc->free))
  255. return lc_evict(lc);
  256. n = lc->free.next;
  257. list_del(n);
  258. return list_entry(n, struct lc_element, list);
  259. }
  260. static int lc_unused_element_available(struct lru_cache *lc)
  261. {
  262. if (!list_empty(&lc->free))
  263. return 1; /* something on the free list */
  264. if (!list_empty(&lc->lru))
  265. return 1; /* something to evict */
  266. return 0;
  267. }
  268. /**
  269. * lc_get - get element by label, maybe change the active set
  270. * @lc: the lru cache to operate on
  271. * @enr: the label to look up
  272. *
  273. * Finds an element in the cache, increases its usage count,
  274. * "touches" and returns it.
  275. *
  276. * In case the requested number is not present, it needs to be added to the
  277. * cache. Therefore it is possible that an other element becomes evicted from
  278. * the cache. In either case, the user is notified so he is able to e.g. keep
  279. * a persistent log of the cache changes, and therefore the objects in use.
  280. *
  281. * Return values:
  282. * NULL
  283. * The cache was marked %LC_STARVING,
  284. * or the requested label was not in the active set
  285. * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
  286. * Or no unused or free element could be recycled (@lc will be marked as
  287. * %LC_STARVING, blocking further lc_get() operations).
  288. *
  289. * pointer to the element with the REQUESTED element number.
  290. * In this case, it can be used right away
  291. *
  292. * pointer to an UNUSED element with some different element number,
  293. * where that different number may also be %LC_FREE.
  294. *
  295. * In this case, the cache is marked %LC_DIRTY (blocking further changes),
  296. * and the returned element pointer is removed from the lru list and
  297. * hash collision chains. The user now should do whatever housekeeping
  298. * is necessary.
  299. * Then he must call lc_changed(lc,element_pointer), to finish
  300. * the change.
  301. *
  302. * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
  303. * any cache set change.
  304. */
  305. struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
  306. {
  307. struct lc_element *e;
  308. PARANOIA_ENTRY();
  309. if (lc->flags & LC_STARVING) {
  310. ++lc->starving;
  311. RETURN(NULL);
  312. }
  313. e = lc_find(lc, enr);
  314. if (e) {
  315. ++lc->hits;
  316. if (e->refcnt++ == 0)
  317. lc->used++;
  318. list_move(&e->list, &lc->in_use); /* Not evictable... */
  319. RETURN(e);
  320. }
  321. ++lc->misses;
  322. /* In case there is nothing available and we can not kick out
  323. * the LRU element, we have to wait ...
  324. */
  325. if (!lc_unused_element_available(lc)) {
  326. __set_bit(__LC_STARVING, &lc->flags);
  327. RETURN(NULL);
  328. }
  329. /* it was not present in the active set.
  330. * we are going to recycle an unused (or even "free") element.
  331. * user may need to commit a transaction to record that change.
  332. * we serialize on flags & TF_DIRTY */
  333. if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
  334. ++lc->dirty;
  335. RETURN(NULL);
  336. }
  337. e = lc_get_unused_element(lc);
  338. BUG_ON(!e);
  339. clear_bit(__LC_STARVING, &lc->flags);
  340. BUG_ON(++e->refcnt != 1);
  341. lc->used++;
  342. lc->changing_element = e;
  343. lc->new_number = enr;
  344. RETURN(e);
  345. }
  346. /* similar to lc_get,
  347. * but only gets a new reference on an existing element.
  348. * you either get the requested element, or NULL.
  349. * will be consolidated into one function.
  350. */
  351. struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
  352. {
  353. struct lc_element *e;
  354. PARANOIA_ENTRY();
  355. if (lc->flags & LC_STARVING) {
  356. ++lc->starving;
  357. RETURN(NULL);
  358. }
  359. e = lc_find(lc, enr);
  360. if (e) {
  361. ++lc->hits;
  362. if (e->refcnt++ == 0)
  363. lc->used++;
  364. list_move(&e->list, &lc->in_use); /* Not evictable... */
  365. }
  366. RETURN(e);
  367. }
  368. /**
  369. * lc_changed - tell @lc that the change has been recorded
  370. * @lc: the lru cache to operate on
  371. * @e: the element pending label change
  372. */
  373. void lc_changed(struct lru_cache *lc, struct lc_element *e)
  374. {
  375. PARANOIA_ENTRY();
  376. BUG_ON(e != lc->changing_element);
  377. PARANOIA_LC_ELEMENT(lc, e);
  378. ++lc->changed;
  379. e->lc_number = lc->new_number;
  380. list_add(&e->list, &lc->in_use);
  381. hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
  382. lc->changing_element = NULL;
  383. lc->new_number = LC_FREE;
  384. clear_bit(__LC_DIRTY, &lc->flags);
  385. smp_mb__after_clear_bit();
  386. RETURN();
  387. }
  388. /**
  389. * lc_put - give up refcnt of @e
  390. * @lc: the lru cache to operate on
  391. * @e: the element to put
  392. *
  393. * If refcnt reaches zero, the element is moved to the lru list,
  394. * and a %LC_STARVING (if set) is cleared.
  395. * Returns the new (post-decrement) refcnt.
  396. */
  397. unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
  398. {
  399. PARANOIA_ENTRY();
  400. PARANOIA_LC_ELEMENT(lc, e);
  401. BUG_ON(e->refcnt == 0);
  402. BUG_ON(e == lc->changing_element);
  403. if (--e->refcnt == 0) {
  404. /* move it to the front of LRU. */
  405. list_move(&e->list, &lc->lru);
  406. lc->used--;
  407. clear_bit(__LC_STARVING, &lc->flags);
  408. smp_mb__after_clear_bit();
  409. }
  410. RETURN(e->refcnt);
  411. }
  412. /**
  413. * lc_element_by_index
  414. * @lc: the lru cache to operate on
  415. * @i: the index of the element to return
  416. */
  417. struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
  418. {
  419. BUG_ON(i >= lc->nr_elements);
  420. BUG_ON(lc->lc_element[i] == NULL);
  421. BUG_ON(lc->lc_element[i]->lc_index != i);
  422. return lc->lc_element[i];
  423. }
  424. /**
  425. * lc_index_of
  426. * @lc: the lru cache to operate on
  427. * @e: the element to query for its index position in lc->element
  428. */
  429. unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
  430. {
  431. PARANOIA_LC_ELEMENT(lc, e);
  432. return e->lc_index;
  433. }
  434. /**
  435. * lc_set - associate index with label
  436. * @lc: the lru cache to operate on
  437. * @enr: the label to set
  438. * @index: the element index to associate label with.
  439. *
  440. * Used to initialize the active set to some previously recorded state.
  441. */
  442. void lc_set(struct lru_cache *lc, unsigned int enr, int index)
  443. {
  444. struct lc_element *e;
  445. if (index < 0 || index >= lc->nr_elements)
  446. return;
  447. e = lc_element_by_index(lc, index);
  448. e->lc_number = enr;
  449. hlist_del_init(&e->colision);
  450. hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
  451. list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
  452. }
  453. /**
  454. * lc_dump - Dump a complete LRU cache to seq in textual form.
  455. * @lc: the lru cache to operate on
  456. * @seq: the &struct seq_file pointer to seq_printf into
  457. * @utext: user supplied "heading" or other info
  458. * @detail: function pointer the user may provide to dump further details
  459. * of the object the lc_element is embedded in.
  460. */
  461. void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
  462. void (*detail) (struct seq_file *, struct lc_element *))
  463. {
  464. unsigned int nr_elements = lc->nr_elements;
  465. struct lc_element *e;
  466. int i;
  467. seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
  468. for (i = 0; i < nr_elements; i++) {
  469. e = lc_element_by_index(lc, i);
  470. if (e->lc_number == LC_FREE) {
  471. seq_printf(seq, "\t%2d: FREE\n", i);
  472. } else {
  473. seq_printf(seq, "\t%2d: %4u %4u ", i,
  474. e->lc_number, e->refcnt);
  475. detail(seq, e);
  476. }
  477. }
  478. }
  479. EXPORT_SYMBOL(lc_create);
  480. EXPORT_SYMBOL(lc_reset);
  481. EXPORT_SYMBOL(lc_destroy);
  482. EXPORT_SYMBOL(lc_set);
  483. EXPORT_SYMBOL(lc_del);
  484. EXPORT_SYMBOL(lc_try_get);
  485. EXPORT_SYMBOL(lc_find);
  486. EXPORT_SYMBOL(lc_get);
  487. EXPORT_SYMBOL(lc_put);
  488. EXPORT_SYMBOL(lc_changed);
  489. EXPORT_SYMBOL(lc_element_by_index);
  490. EXPORT_SYMBOL(lc_index_of);
  491. EXPORT_SYMBOL(lc_seq_printf_stats);
  492. EXPORT_SYMBOL(lc_seq_dump_details);