xvmalloc.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. /*
  2. * xvmalloc memory allocator
  3. *
  4. * Copyright (C) 2008, 2009, 2010 Nitin Gupta
  5. *
  6. * This code is released using a dual license strategy: BSD/GPL
  7. * You can choose the licence that better fits your requirements.
  8. *
  9. * Released under the terms of 3-clause BSD License
  10. * Released under the terms of GNU General Public License Version 2.0
  11. */
  12. #ifdef CONFIG_ZRAM_DEBUG
  13. #define DEBUG
  14. #endif
  15. #include <linux/module.h>
  16. #include <linux/kernel.h>
  17. #include <linux/bitops.h>
  18. #include <linux/errno.h>
  19. #include <linux/highmem.h>
  20. #include <linux/init.h>
  21. #include <linux/string.h>
  22. #include <linux/slab.h>
  23. #include "xvmalloc.h"
  24. #include "xvmalloc_int.h"
  25. static void stat_inc(u64 *value)
  26. {
  27. *value = *value + 1;
  28. }
  29. static void stat_dec(u64 *value)
  30. {
  31. *value = *value - 1;
  32. }
  33. static int test_flag(struct block_header *block, enum blockflags flag)
  34. {
  35. return block->prev & BIT(flag);
  36. }
  37. static void set_flag(struct block_header *block, enum blockflags flag)
  38. {
  39. block->prev |= BIT(flag);
  40. }
  41. static void clear_flag(struct block_header *block, enum blockflags flag)
  42. {
  43. block->prev &= ~BIT(flag);
  44. }
  45. /*
  46. * Given <page, offset> pair, provide a dereferencable pointer.
  47. * This is called from xv_malloc/xv_free path, so it
  48. * needs to be fast.
  49. */
  50. static void *get_ptr_atomic(struct page *page, u16 offset, enum km_type type)
  51. {
  52. unsigned char *base;
  53. base = kmap_atomic(page, type);
  54. return base + offset;
  55. }
  56. static void put_ptr_atomic(void *ptr, enum km_type type)
  57. {
  58. kunmap_atomic(ptr, type);
  59. }
  60. static u32 get_blockprev(struct block_header *block)
  61. {
  62. return block->prev & PREV_MASK;
  63. }
  64. static void set_blockprev(struct block_header *block, u16 new_offset)
  65. {
  66. block->prev = new_offset | (block->prev & FLAGS_MASK);
  67. }
  68. static struct block_header *BLOCK_NEXT(struct block_header *block)
  69. {
  70. return (struct block_header *)
  71. ((char *)block + block->size + XV_ALIGN);
  72. }
  73. /*
  74. * Get index of free list containing blocks of maximum size
  75. * which is less than or equal to given size.
  76. */
  77. static u32 get_index_for_insert(u32 size)
  78. {
  79. if (unlikely(size > XV_MAX_ALLOC_SIZE))
  80. size = XV_MAX_ALLOC_SIZE;
  81. size &= ~FL_DELTA_MASK;
  82. return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
  83. }
  84. /*
  85. * Get index of free list having blocks of size greater than
  86. * or equal to requested size.
  87. */
  88. static u32 get_index(u32 size)
  89. {
  90. if (unlikely(size < XV_MIN_ALLOC_SIZE))
  91. size = XV_MIN_ALLOC_SIZE;
  92. size = ALIGN(size, FL_DELTA);
  93. return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
  94. }
  95. /**
  96. * find_block - find block of at least given size
  97. * @pool: memory pool to search from
  98. * @size: size of block required
  99. * @page: page containing required block
  100. * @offset: offset within the page where block is located.
  101. *
  102. * Searches two level bitmap to locate block of at least
  103. * the given size. If such a block is found, it provides
  104. * <page, offset> to identify this block and returns index
  105. * in freelist where we found this block.
  106. * Otherwise, returns 0 and <page, offset> params are not touched.
  107. */
  108. static u32 find_block(struct xv_pool *pool, u32 size,
  109. struct page **page, u32 *offset)
  110. {
  111. ulong flbitmap, slbitmap;
  112. u32 flindex, slindex, slbitstart;
  113. /* There are no free blocks in this pool */
  114. if (!pool->flbitmap)
  115. return 0;
  116. /* Get freelist index correspoding to this size */
  117. slindex = get_index(size);
  118. slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
  119. slbitstart = slindex % BITS_PER_LONG;
  120. /*
  121. * If freelist is not empty at this index, we found the
  122. * block - head of this list. This is approximate best-fit match.
  123. */
  124. if (test_bit(slbitstart, &slbitmap)) {
  125. *page = pool->freelist[slindex].page;
  126. *offset = pool->freelist[slindex].offset;
  127. return slindex;
  128. }
  129. /*
  130. * No best-fit found. Search a bit further in bitmap for a free block.
  131. * Second level bitmap consists of series of 32-bit chunks. Search
  132. * further in the chunk where we expected a best-fit, starting from
  133. * index location found above.
  134. */
  135. slbitstart++;
  136. slbitmap >>= slbitstart;
  137. /* Skip this search if we were already at end of this bitmap chunk */
  138. if ((slbitstart != BITS_PER_LONG) && slbitmap) {
  139. slindex += __ffs(slbitmap) + 1;
  140. *page = pool->freelist[slindex].page;
  141. *offset = pool->freelist[slindex].offset;
  142. return slindex;
  143. }
  144. /* Now do a full two-level bitmap search to find next nearest fit */
  145. flindex = slindex / BITS_PER_LONG;
  146. flbitmap = (pool->flbitmap) >> (flindex + 1);
  147. if (!flbitmap)
  148. return 0;
  149. flindex += __ffs(flbitmap) + 1;
  150. slbitmap = pool->slbitmap[flindex];
  151. slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
  152. *page = pool->freelist[slindex].page;
  153. *offset = pool->freelist[slindex].offset;
  154. return slindex;
  155. }
  156. /*
  157. * Insert block at <page, offset> in freelist of given pool.
  158. * freelist used depends on block size.
  159. */
  160. static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
  161. struct block_header *block)
  162. {
  163. u32 flindex, slindex;
  164. struct block_header *nextblock;
  165. slindex = get_index_for_insert(block->size);
  166. flindex = slindex / BITS_PER_LONG;
  167. block->link.prev_page = NULL;
  168. block->link.prev_offset = 0;
  169. block->link.next_page = pool->freelist[slindex].page;
  170. block->link.next_offset = pool->freelist[slindex].offset;
  171. pool->freelist[slindex].page = page;
  172. pool->freelist[slindex].offset = offset;
  173. if (block->link.next_page) {
  174. nextblock = get_ptr_atomic(block->link.next_page,
  175. block->link.next_offset, KM_USER1);
  176. nextblock->link.prev_page = page;
  177. nextblock->link.prev_offset = offset;
  178. put_ptr_atomic(nextblock, KM_USER1);
  179. /* If there was a next page then the free bits are set. */
  180. return;
  181. }
  182. __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
  183. __set_bit(flindex, &pool->flbitmap);
  184. }
  185. /*
  186. * Remove block from freelist. Index 'slindex' identifies the freelist.
  187. */
  188. static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
  189. struct block_header *block, u32 slindex)
  190. {
  191. u32 flindex = slindex / BITS_PER_LONG;
  192. struct block_header *tmpblock;
  193. if (block->link.prev_page) {
  194. tmpblock = get_ptr_atomic(block->link.prev_page,
  195. block->link.prev_offset, KM_USER1);
  196. tmpblock->link.next_page = block->link.next_page;
  197. tmpblock->link.next_offset = block->link.next_offset;
  198. put_ptr_atomic(tmpblock, KM_USER1);
  199. }
  200. if (block->link.next_page) {
  201. tmpblock = get_ptr_atomic(block->link.next_page,
  202. block->link.next_offset, KM_USER1);
  203. tmpblock->link.prev_page = block->link.prev_page;
  204. tmpblock->link.prev_offset = block->link.prev_offset;
  205. put_ptr_atomic(tmpblock, KM_USER1);
  206. }
  207. /* Is this block is at the head of the freelist? */
  208. if (pool->freelist[slindex].page == page
  209. && pool->freelist[slindex].offset == offset) {
  210. pool->freelist[slindex].page = block->link.next_page;
  211. pool->freelist[slindex].offset = block->link.next_offset;
  212. if (pool->freelist[slindex].page) {
  213. struct block_header *tmpblock;
  214. tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
  215. pool->freelist[slindex].offset,
  216. KM_USER1);
  217. tmpblock->link.prev_page = NULL;
  218. tmpblock->link.prev_offset = 0;
  219. put_ptr_atomic(tmpblock, KM_USER1);
  220. } else {
  221. /* This freelist bucket is empty */
  222. __clear_bit(slindex % BITS_PER_LONG,
  223. &pool->slbitmap[flindex]);
  224. if (!pool->slbitmap[flindex])
  225. __clear_bit(flindex, &pool->flbitmap);
  226. }
  227. }
  228. block->link.prev_page = NULL;
  229. block->link.prev_offset = 0;
  230. block->link.next_page = NULL;
  231. block->link.next_offset = 0;
  232. }
  233. /*
  234. * Allocate a page and add it to freelist of given pool.
  235. */
  236. static int grow_pool(struct xv_pool *pool, gfp_t flags)
  237. {
  238. struct page *page;
  239. struct block_header *block;
  240. page = alloc_page(flags);
  241. if (unlikely(!page))
  242. return -ENOMEM;
  243. stat_inc(&pool->total_pages);
  244. spin_lock(&pool->lock);
  245. block = get_ptr_atomic(page, 0, KM_USER0);
  246. block->size = PAGE_SIZE - XV_ALIGN;
  247. set_flag(block, BLOCK_FREE);
  248. clear_flag(block, PREV_FREE);
  249. set_blockprev(block, 0);
  250. insert_block(pool, page, 0, block);
  251. put_ptr_atomic(block, KM_USER0);
  252. spin_unlock(&pool->lock);
  253. return 0;
  254. }
  255. /*
  256. * Create a memory pool. Allocates freelist, bitmaps and other
  257. * per-pool metadata.
  258. */
  259. struct xv_pool *xv_create_pool(void)
  260. {
  261. u32 ovhd_size;
  262. struct xv_pool *pool;
  263. ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
  264. pool = kzalloc(ovhd_size, GFP_KERNEL);
  265. if (!pool)
  266. return NULL;
  267. spin_lock_init(&pool->lock);
  268. return pool;
  269. }
  270. EXPORT_SYMBOL_GPL(xv_create_pool);
  271. void xv_destroy_pool(struct xv_pool *pool)
  272. {
  273. kfree(pool);
  274. }
  275. EXPORT_SYMBOL_GPL(xv_destroy_pool);
  276. /**
  277. * xv_malloc - Allocate block of given size from pool.
  278. * @pool: pool to allocate from
  279. * @size: size of block to allocate
  280. * @page: page no. that holds the object
  281. * @offset: location of object within page
  282. *
  283. * On success, <page, offset> identifies block allocated
  284. * and 0 is returned. On failure, <page, offset> is set to
  285. * 0 and -ENOMEM is returned.
  286. *
  287. * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
  288. */
  289. int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
  290. u32 *offset, gfp_t flags)
  291. {
  292. int error;
  293. u32 index, tmpsize, origsize, tmpoffset;
  294. struct block_header *block, *tmpblock;
  295. *page = NULL;
  296. *offset = 0;
  297. origsize = size;
  298. if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
  299. return -ENOMEM;
  300. size = ALIGN(size, XV_ALIGN);
  301. spin_lock(&pool->lock);
  302. index = find_block(pool, size, page, offset);
  303. if (!*page) {
  304. spin_unlock(&pool->lock);
  305. if (flags & GFP_NOWAIT)
  306. return -ENOMEM;
  307. error = grow_pool(pool, flags);
  308. if (unlikely(error))
  309. return error;
  310. spin_lock(&pool->lock);
  311. index = find_block(pool, size, page, offset);
  312. }
  313. if (!*page) {
  314. spin_unlock(&pool->lock);
  315. return -ENOMEM;
  316. }
  317. block = get_ptr_atomic(*page, *offset, KM_USER0);
  318. remove_block(pool, *page, *offset, block, index);
  319. /* Split the block if required */
  320. tmpoffset = *offset + size + XV_ALIGN;
  321. tmpsize = block->size - size;
  322. tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
  323. if (tmpsize) {
  324. tmpblock->size = tmpsize - XV_ALIGN;
  325. set_flag(tmpblock, BLOCK_FREE);
  326. clear_flag(tmpblock, PREV_FREE);
  327. set_blockprev(tmpblock, *offset);
  328. if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
  329. insert_block(pool, *page, tmpoffset, tmpblock);
  330. if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
  331. tmpblock = BLOCK_NEXT(tmpblock);
  332. set_blockprev(tmpblock, tmpoffset);
  333. }
  334. } else {
  335. /* This block is exact fit */
  336. if (tmpoffset != PAGE_SIZE)
  337. clear_flag(tmpblock, PREV_FREE);
  338. }
  339. block->size = origsize;
  340. clear_flag(block, BLOCK_FREE);
  341. put_ptr_atomic(block, KM_USER0);
  342. spin_unlock(&pool->lock);
  343. *offset += XV_ALIGN;
  344. return 0;
  345. }
  346. EXPORT_SYMBOL_GPL(xv_malloc);
  347. /*
  348. * Free block identified with <page, offset>
  349. */
  350. void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
  351. {
  352. void *page_start;
  353. struct block_header *block, *tmpblock;
  354. offset -= XV_ALIGN;
  355. spin_lock(&pool->lock);
  356. page_start = get_ptr_atomic(page, 0, KM_USER0);
  357. block = (struct block_header *)((char *)page_start + offset);
  358. /* Catch double free bugs */
  359. BUG_ON(test_flag(block, BLOCK_FREE));
  360. block->size = ALIGN(block->size, XV_ALIGN);
  361. tmpblock = BLOCK_NEXT(block);
  362. if (offset + block->size + XV_ALIGN == PAGE_SIZE)
  363. tmpblock = NULL;
  364. /* Merge next block if its free */
  365. if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
  366. /*
  367. * Blocks smaller than XV_MIN_ALLOC_SIZE
  368. * are not inserted in any free list.
  369. */
  370. if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
  371. remove_block(pool, page,
  372. offset + block->size + XV_ALIGN, tmpblock,
  373. get_index_for_insert(tmpblock->size));
  374. }
  375. block->size += tmpblock->size + XV_ALIGN;
  376. }
  377. /* Merge previous block if its free */
  378. if (test_flag(block, PREV_FREE)) {
  379. tmpblock = (struct block_header *)((char *)(page_start) +
  380. get_blockprev(block));
  381. offset = offset - tmpblock->size - XV_ALIGN;
  382. if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
  383. remove_block(pool, page, offset, tmpblock,
  384. get_index_for_insert(tmpblock->size));
  385. tmpblock->size += block->size + XV_ALIGN;
  386. block = tmpblock;
  387. }
  388. /* No used objects in this page. Free it. */
  389. if (block->size == PAGE_SIZE - XV_ALIGN) {
  390. put_ptr_atomic(page_start, KM_USER0);
  391. spin_unlock(&pool->lock);
  392. __free_page(page);
  393. stat_dec(&pool->total_pages);
  394. return;
  395. }
  396. set_flag(block, BLOCK_FREE);
  397. if (block->size >= XV_MIN_ALLOC_SIZE)
  398. insert_block(pool, page, offset, block);
  399. if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
  400. tmpblock = BLOCK_NEXT(block);
  401. set_flag(tmpblock, PREV_FREE);
  402. set_blockprev(tmpblock, offset);
  403. }
  404. put_ptr_atomic(page_start, KM_USER0);
  405. spin_unlock(&pool->lock);
  406. }
  407. EXPORT_SYMBOL_GPL(xv_free);
  408. u32 xv_get_object_size(void *obj)
  409. {
  410. struct block_header *blk;
  411. blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
  412. return blk->size;
  413. }
  414. EXPORT_SYMBOL_GPL(xv_get_object_size);
  415. /*
  416. * Returns total memory used by allocator (userdata + metadata)
  417. */
  418. u64 xv_get_total_size_bytes(struct xv_pool *pool)
  419. {
  420. return pool->total_pages << PAGE_SHIFT;
  421. }
  422. EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);