123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528 |
- /*
- * zbud.c
- *
- * Copyright (C) 2013, Seth Jennings, IBM
- *
- * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
- *
- * zbud is an special purpose allocator for storing compressed pages. Contrary
- * to what its name may suggest, zbud is not a buddy allocator, but rather an
- * allocator that "buddies" two compressed pages together in a single memory
- * page.
- *
- * While this design limits storage density, it has simple and deterministic
- * reclaim properties that make it preferable to a higher density approach when
- * reclaim will be used.
- *
- * zbud works by storing compressed pages, or "zpages", together in pairs in a
- * single memory page called a "zbud page". The first buddy is "left
- * justifed" at the beginning of the zbud page, and the last buddy is "right
- * justified" at the end of the zbud page. The benefit is that if either
- * buddy is freed, the freed buddy space, coalesced with whatever slack space
- * that existed between the buddies, results in the largest possible free region
- * within the zbud page.
- *
- * zbud also provides an attractive lower bound on density. The ratio of zpages
- * to zbud pages can not be less than 1. This ensures that zbud can never "do
- * harm" by using more pages to store zpages than the uncompressed zpages would
- * have used on their own.
- *
- * zbud pages are divided into "chunks". The size of the chunks is fixed at
- * compile time and determined by NCHUNKS_ORDER below. Dividing zbud pages
- * into chunks allows organizing unbuddied zbud pages into a manageable number
- * of unbuddied lists according to the number of free chunks available in the
- * zbud page.
- *
- * The zbud API differs from that of conventional allocators in that the
- * allocation function, zbud_alloc(), returns an opaque handle to the user,
- * not a dereferenceable pointer. The user must map the handle using
- * zbud_map() in order to get a usable pointer by which to access the
- * allocation data and unmap the handle with zbud_unmap() when operations
- * on the allocation data are complete.
- */
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- #include <linux/atomic.h>
- #include <linux/list.h>
- #include <linux/mm.h>
- #include <linux/module.h>
- #include <linux/preempt.h>
- #include <linux/slab.h>
- #include <linux/spinlock.h>
- #include <linux/zbud.h>
- /*****************
- * Structures
- *****************/
- /*
- * NCHUNKS_ORDER determines the internal allocation granularity, effectively
- * adjusting internal fragmentation. It also determines the number of
- * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
- * allocation granularity will be in chunks of size PAGE_SIZE/64, and there
- * will be 64 freelists per pool.
- */
- #define NCHUNKS_ORDER 6
- #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
- #define CHUNK_SIZE (1 << CHUNK_SHIFT)
- #define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
- #define ZHDR_SIZE_ALIGNED CHUNK_SIZE
- /**
- * struct zbud_pool - stores metadata for each zbud pool
- * @lock: protects all pool fields and first|last_chunk fields of any
- * zbud page in the pool
- * @unbuddied: array of lists tracking zbud pages that only contain one buddy;
- * the lists each zbud page is added to depends on the size of
- * its free region.
- * @buddied: list tracking the zbud pages that contain two buddies;
- * these zbud pages are full
- * @lru: list tracking the zbud pages in LRU order by most recently
- * added buddy.
- * @pages_nr: number of zbud pages in the pool.
- * @ops: pointer to a structure of user defined operations specified at
- * pool creation time.
- *
- * This structure is allocated at pool creation time and maintains metadata
- * pertaining to a particular zbud pool.
- */
- struct zbud_pool {
- spinlock_t lock;
- struct list_head unbuddied[NCHUNKS];
- struct list_head buddied;
- struct list_head lru;
- u64 pages_nr;
- struct zbud_ops *ops;
- };
- /*
- * struct zbud_header - zbud page metadata occupying the first chunk of each
- * zbud page.
- * @buddy: links the zbud page into the unbuddied/buddied lists in the pool
- * @lru: links the zbud page into the lru list in the pool
- * @first_chunks: the size of the first buddy in chunks, 0 if free
- * @last_chunks: the size of the last buddy in chunks, 0 if free
- */
- struct zbud_header {
- struct list_head buddy;
- struct list_head lru;
- unsigned int first_chunks;
- unsigned int last_chunks;
- bool under_reclaim;
- };
- /*****************
- * Helpers
- *****************/
- /* Just to make the code easier to read */
- enum buddy {
- FIRST,
- LAST
- };
- /* Converts an allocation size in bytes to size in zbud chunks */
- static int size_to_chunks(int size)
- {
- return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
- }
- #define for_each_unbuddied_list(_iter, _begin) \
- for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
- /* Initializes the zbud header of a newly allocated zbud page */
- static struct zbud_header *init_zbud_page(struct page *page)
- {
- struct zbud_header *zhdr = page_address(page);
- zhdr->first_chunks = 0;
- zhdr->last_chunks = 0;
- INIT_LIST_HEAD(&zhdr->buddy);
- INIT_LIST_HEAD(&zhdr->lru);
- zhdr->under_reclaim = 0;
- return zhdr;
- }
- /* Resets the struct page fields and frees the page */
- static void free_zbud_page(struct zbud_header *zhdr)
- {
- __free_page(virt_to_page(zhdr));
- }
- /*
- * Encodes the handle of a particular buddy within a zbud page
- * Pool lock should be held as this function accesses first|last_chunks
- */
- static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
- {
- unsigned long handle;
- /*
- * For now, the encoded handle is actually just the pointer to the data
- * but this might not always be the case. A little information hiding.
- * Add CHUNK_SIZE to the handle if it is the first allocation to jump
- * over the zbud header in the first chunk.
- */
- handle = (unsigned long)zhdr;
- if (bud == FIRST)
- /* skip over zbud header */
- handle += ZHDR_SIZE_ALIGNED;
- else /* bud == LAST */
- handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
- return handle;
- }
- /* Returns the zbud page where a given handle is stored */
- static struct zbud_header *handle_to_zbud_header(unsigned long handle)
- {
- return (struct zbud_header *)(handle & PAGE_MASK);
- }
- /* Returns the number of free chunks in a zbud page */
- static int num_free_chunks(struct zbud_header *zhdr)
- {
- /*
- * Rather than branch for different situations, just use the fact that
- * free buddies have a length of zero to simplify everything. -1 at the
- * end for the zbud header.
- */
- return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks - 1;
- }
- /*****************
- * API Functions
- *****************/
- /**
- * zbud_create_pool() - create a new zbud pool
- * @gfp: gfp flags when allocating the zbud pool structure
- * @ops: user-defined operations for the zbud pool
- *
- * Return: pointer to the new zbud pool or NULL if the metadata allocation
- * failed.
- */
- struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
- {
- struct zbud_pool *pool;
- int i;
- pool = kmalloc(sizeof(struct zbud_pool), gfp);
- if (!pool)
- return NULL;
- spin_lock_init(&pool->lock);
- for_each_unbuddied_list(i, 0)
- INIT_LIST_HEAD(&pool->unbuddied[i]);
- INIT_LIST_HEAD(&pool->buddied);
- INIT_LIST_HEAD(&pool->lru);
- pool->pages_nr = 0;
- pool->ops = ops;
- return pool;
- }
- /**
- * zbud_destroy_pool() - destroys an existing zbud pool
- * @pool: the zbud pool to be destroyed
- *
- * The pool should be emptied before this function is called.
- */
- void zbud_destroy_pool(struct zbud_pool *pool)
- {
- kfree(pool);
- }
- /**
- * zbud_alloc() - allocates a region of a given size
- * @pool: zbud pool from which to allocate
- * @size: size in bytes of the desired allocation
- * @gfp: gfp flags used if the pool needs to grow
- * @handle: handle of the new allocation
- *
- * This function will attempt to find a free region in the pool large enough to
- * satisfy the allocation request. A search of the unbuddied lists is
- * performed first. If no suitable free region is found, then a new page is
- * allocated and added to the pool to satisfy the request.
- *
- * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
- * as zbud pool pages.
- *
- * Return: 0 if success and handle is set, otherwise -EINVAL is the size or
- * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
- * a new page.
- */
- int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
- unsigned long *handle)
- {
- int chunks, i, freechunks;
- struct zbud_header *zhdr = NULL;
- enum buddy bud;
- struct page *page;
- if (size <= 0 || gfp & __GFP_HIGHMEM)
- return -EINVAL;
- if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED)
- return -ENOSPC;
- chunks = size_to_chunks(size);
- spin_lock(&pool->lock);
- /* First, try to find an unbuddied zbud page. */
- zhdr = NULL;
- for_each_unbuddied_list(i, chunks) {
- if (!list_empty(&pool->unbuddied[i])) {
- zhdr = list_first_entry(&pool->unbuddied[i],
- struct zbud_header, buddy);
- list_del(&zhdr->buddy);
- if (zhdr->first_chunks == 0)
- bud = FIRST;
- else
- bud = LAST;
- goto found;
- }
- }
- /* Couldn't find unbuddied zbud page, create new one */
- spin_unlock(&pool->lock);
- page = alloc_page(gfp);
- if (!page)
- return -ENOMEM;
- spin_lock(&pool->lock);
- pool->pages_nr++;
- zhdr = init_zbud_page(page);
- bud = FIRST;
- found:
- if (bud == FIRST)
- zhdr->first_chunks = chunks;
- else
- zhdr->last_chunks = chunks;
- if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
- /* Add to unbuddied list */
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
- } else {
- /* Add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
- }
- /* Add/move zbud page to beginning of LRU */
- if (!list_empty(&zhdr->lru))
- list_del(&zhdr->lru);
- list_add(&zhdr->lru, &pool->lru);
- *handle = encode_handle(zhdr, bud);
- spin_unlock(&pool->lock);
- return 0;
- }
- /**
- * zbud_free() - frees the allocation associated with the given handle
- * @pool: pool in which the allocation resided
- * @handle: handle associated with the allocation returned by zbud_alloc()
- *
- * In the case that the zbud page in which the allocation resides is under
- * reclaim, as indicated by the PG_reclaim flag being set, this function
- * only sets the first|last_chunks to 0. The page is actually freed
- * once both buddies are evicted (see zbud_reclaim_page() below).
- */
- void zbud_free(struct zbud_pool *pool, unsigned long handle)
- {
- struct zbud_header *zhdr;
- int freechunks;
- spin_lock(&pool->lock);
- zhdr = handle_to_zbud_header(handle);
- /* If first buddy, handle will be page aligned */
- if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
- zhdr->last_chunks = 0;
- else
- zhdr->first_chunks = 0;
- if (zhdr->under_reclaim) {
- /* zbud page is under reclaim, reclaim will free */
- spin_unlock(&pool->lock);
- return;
- }
- /* Remove from existing buddy list */
- list_del(&zhdr->buddy);
- if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- /* zbud page is empty, free */
- list_del(&zhdr->lru);
- free_zbud_page(zhdr);
- pool->pages_nr--;
- } else {
- /* Add to unbuddied list */
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
- }
- spin_unlock(&pool->lock);
- }
- #define list_tail_entry(ptr, type, member) \
- list_entry((ptr)->prev, type, member)
- /**
- * zbud_reclaim_page() - evicts allocations from a pool page and frees it
- * @pool: pool from which a page will attempt to be evicted
- * @retires: number of pages on the LRU list for which eviction will
- * be attempted before failing
- *
- * zbud reclaim is different from normal system reclaim in that the reclaim is
- * done from the bottom, up. This is because only the bottom layer, zbud, has
- * information on how the allocations are organized within each zbud page. This
- * has the potential to create interesting locking situations between zbud and
- * the user, however.
- *
- * To avoid these, this is how zbud_reclaim_page() should be called:
- * The user detects a page should be reclaimed and calls zbud_reclaim_page().
- * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
- * the user-defined eviction handler with the pool and handle as arguments.
- *
- * If the handle can not be evicted, the eviction handler should return
- * non-zero. zbud_reclaim_page() will add the zbud page back to the
- * appropriate list and try the next zbud page on the LRU up to
- * a user defined number of retries.
- *
- * If the handle is successfully evicted, the eviction handler should
- * return 0 _and_ should have called zbud_free() on the handle. zbud_free()
- * contains logic to delay freeing the page if the page is under reclaim,
- * as indicated by the setting of the PG_reclaim flag on the underlying page.
- *
- * If all buddies in the zbud page are successfully evicted, then the
- * zbud page can be freed.
- *
- * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
- * no pages to evict or an eviction handler is not registered, -EAGAIN if
- * the retry limit was hit.
- */
- int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
- {
- int i, ret, freechunks;
- struct zbud_header *zhdr;
- unsigned long first_handle = 0, last_handle = 0;
- spin_lock(&pool->lock);
- if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
- retries == 0) {
- spin_unlock(&pool->lock);
- return -EINVAL;
- }
- for (i = 0; i < retries; i++) {
- zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
- list_del(&zhdr->lru);
- list_del(&zhdr->buddy);
- /* Protect zbud page against free */
- zhdr->under_reclaim = true;
- /*
- * We need encode the handles before unlocking, since we can
- * race with free that will set (first|last)_chunks to 0
- */
- first_handle = 0;
- last_handle = 0;
- if (zhdr->first_chunks)
- first_handle = encode_handle(zhdr, FIRST);
- if (zhdr->last_chunks)
- last_handle = encode_handle(zhdr, LAST);
- spin_unlock(&pool->lock);
- /* Issue the eviction callback(s) */
- if (first_handle) {
- ret = pool->ops->evict(pool, first_handle);
- if (ret)
- goto next;
- }
- if (last_handle) {
- ret = pool->ops->evict(pool, last_handle);
- if (ret)
- goto next;
- }
- next:
- spin_lock(&pool->lock);
- zhdr->under_reclaim = false;
- if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
- /*
- * Both buddies are now free, free the zbud page and
- * return success.
- */
- free_zbud_page(zhdr);
- pool->pages_nr--;
- spin_unlock(&pool->lock);
- return 0;
- } else if (zhdr->first_chunks == 0 ||
- zhdr->last_chunks == 0) {
- /* add to unbuddied list */
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
- } else {
- /* add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
- }
- /* add to beginning of LRU */
- list_add(&zhdr->lru, &pool->lru);
- }
- spin_unlock(&pool->lock);
- return -EAGAIN;
- }
- /**
- * zbud_map() - maps the allocation associated with the given handle
- * @pool: pool in which the allocation resides
- * @handle: handle associated with the allocation to be mapped
- *
- * While trivial for zbud, the mapping functions for others allocators
- * implementing this allocation API could have more complex information encoded
- * in the handle and could create temporary mappings to make the data
- * accessible to the user.
- *
- * Returns: a pointer to the mapped allocation
- */
- void *zbud_map(struct zbud_pool *pool, unsigned long handle)
- {
- return (void *)(handle);
- }
- /**
- * zbud_unmap() - maps the allocation associated with the given handle
- * @pool: pool in which the allocation resides
- * @handle: handle associated with the allocation to be unmapped
- */
- void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
- {
- }
- /**
- * zbud_get_pool_size() - gets the zbud pool size in pages
- * @pool: pool whose size is being queried
- *
- * Returns: size in pages of the given pool. The pool lock need not be
- * taken to access pages_nr.
- */
- u64 zbud_get_pool_size(struct zbud_pool *pool)
- {
- return pool->pages_nr;
- }
- static int __init init_zbud(void)
- {
- /* Make sure the zbud header will fit in one chunk */
- BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
- pr_info("loaded\n");
- return 0;
- }
- static void __exit exit_zbud(void)
- {
- pr_info("unloaded\n");
- }
- module_init(init_zbud);
- module_exit(exit_zbud);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
- MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
|