123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155 |
- /*
- * linux/mm/zcache.c
- *
- * A cleancache backend for file pages compression.
- * Concepts based on original zcache by Dan Magenheimer.
- * Copyright (C) 2013 Bob Liu <bob.liu@xxxxxxxxxx>
- *
- * With zcache, active file pages can be compressed in memory during page
- * reclaiming. When their data is needed again the I/O reading operation is
- * avoided. This results in a significant performance gain under memory pressure
- * for systems with many file pages.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
- #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- #include <linux/atomic.h>
- #include <linux/cleancache.h>
- #include <linux/cpu.h>
- #include <linux/crypto.h>
- #include <linux/page-flags.h>
- #include <linux/pagemap.h>
- #include <linux/highmem.h>
- #include <linux/mm_types.h>
- #include <linux/module.h>
- #include <linux/slab.h>
- #include <linux/spinlock.h>
- #include <linux/radix-tree.h>
- #include <linux/rbtree.h>
- #include <linux/types.h>
- #include <linux/zbud.h>
- /*
- * Enable/disable zcache (disabled by default)
- */
- static bool zcache_enabled __read_mostly;
- module_param_named(enabled, zcache_enabled, bool, 0);
- /*
- * Compressor to be used by zcache
- */
- #define ZCACHE_COMPRESSOR_DEFAULT "lzo"
- static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
- module_param_named(compressor, zcache_compressor, charp, 0);
- /*
- * The maximum percentage of memory that the compressed pool can occupy.
- */
- static unsigned int zcache_max_pool_percent = 10;
- module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);
- static unsigned int zcache_clear_percent = 4;
- module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
- /*
- * zcache statistics
- */
- static u64 zcache_pool_limit_hit;
- static u64 zcache_dup_entry;
- static u64 zcache_zbud_alloc_fail;
- static u64 zcache_evict_zpages;
- static u64 zcache_evict_filepages;
- static u64 zcache_inactive_pages_refused;
- static u64 zcache_reclaim_fail;
- static u64 zcache_pool_shrink;
- static u64 zcache_pool_shrink_fail;
- static u64 zcache_pool_shrink_pages;
- static u64 zcache_store_failed;
- static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
- static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);
- #define GFP_ZCACHE \
- (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \
- __GFP_NOMEMALLOC | __GFP_NO_KSWAPD | __GFP_ZERO)
- /*
- * Make sure this is different from radix tree
- * indirect ptr or exceptional entry.
- */
- #define ZERO_HANDLE ((void *)~(~0UL >> 1))
- /*
- * Zcache receives pages for compression through the Cleancache API and is able
- * to evict pages from its own compressed pool on an LRU basis in the case that
- * the compressed pool is full.
- *
- * Zcache makes use of zbud for the managing the compressed memory pool. Each
- * allocation in zbud is not directly accessible by address. Rather, a handle
- * (zaddr) is return by the allocation routine and that handle(zaddr must be
- * mapped before being accessed. The compressed memory pool grows on demand and
- * shrinks as compressed pages are freed.
- *
- * When a file page is passed from cleancache to zcache, zcache maintains a
- * mapping of the <filesystem_type, inode_number, page_index> to the zbud
- * address that references that compressed file page. This mapping is achieved
- * with a red-black tree per filesystem type, plus a radix tree per red-black
- * node.
- *
- * A zcache pool with pool_id as the index is created when a filesystem mounted
- * Each zcache pool has a red-black tree, the inode number(rb_index) is the
- * search key. Each red-black tree node has a radix tree which use
- * page->index(ra_index) as the index. Each radix tree slot points to the zbud
- * address combining with some extra information(zcache_ra_handle).
- */
- #define MAX_ZCACHE_POOLS 32
- /*
- * One zcache_pool per (cleancache aware) filesystem mount instance
- */
- struct zcache_pool {
- struct rb_root rbtree;
- rwlock_t rb_lock; /* Protects rbtree */
- u64 size;
- struct zbud_pool *pool; /* Zbud pool used */
- };
- /*
- * Manage all zcache pools
- */
- struct _zcache {
- struct zcache_pool *pools[MAX_ZCACHE_POOLS];
- u32 num_pools; /* Current no. of zcache pools */
- spinlock_t pool_lock; /* Protects pools[] and num_pools */
- };
- struct _zcache zcache;
- /*
- * Redblack tree node, each node has a page index radix-tree.
- * Indexed by inode nubmer.
- */
- struct zcache_rbnode {
- struct rb_node rb_node;
- int rb_index;
- struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
- spinlock_t ra_lock; /* Protects radix tree */
- struct kref refcount;
- };
- /*
- * Radix-tree leaf, indexed by page->index
- */
- struct zcache_ra_handle {
- int rb_index; /* Redblack tree index */
- int ra_index; /* Radix tree index */
- int zlen; /* Compressed page size */
- struct zcache_pool *zpool; /* Finding zcache_pool during evict */
- };
- u64 zcache_pages(void)
- {
- int i;
- u64 count = 0;
- for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
- count += zcache.pools[i]->size;
- return count;
- }
- static struct kmem_cache *zcache_rbnode_cache;
- static int zcache_rbnode_cache_create(void)
- {
- zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
- return zcache_rbnode_cache == NULL;
- }
- static void zcache_rbnode_cache_destroy(void)
- {
- kmem_cache_destroy(zcache_rbnode_cache);
- }
- static int zcache_shrink(struct shrinker *s, struct shrink_control *sc)
- {
- unsigned long active_file;
- unsigned long file;
- long file_gap;
- unsigned long freed = 0;
- unsigned long pool;
- static bool running;
- int i = 0;
- int retries;
- if (running)
- goto end;
- running = true;
- active_file = global_page_state(NR_ACTIVE_FILE);
- file = global_page_state(NR_FILE_PAGES);
- pool = zcache_pages();
- file_gap = pool - file;
- if ((file_gap >= 0) &&
- (totalram_pages * zcache_clear_percent / 100 > file)) {
- file_gap = pool;
- zcache_pool_shrink++;
- goto reclaim;
- }
- /*
- * file_gap == 0 means that the number of pages
- * stored by zcache is around twice as many as the
- * number of active file pages.
- */
- file_gap = pool - active_file;
- if (file_gap < 0)
- file_gap = 0;
- else
- zcache_pool_shrink++;
- reclaim:
- retries = file_gap;
- while ((file_gap > 0) && retries) {
- struct zcache_pool *zpool =
- zcache.pools[i++ % MAX_ZCACHE_POOLS];
- if (!zpool || !zpool->size)
- continue;
- if (zbud_reclaim_page(zpool->pool, 8)) {
- zcache_pool_shrink_fail++;
- retries--;
- continue;
- }
- freed++;
- file_gap--;
- }
- zcache_pool_shrink_pages += freed;
- for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
- zcache.pools[i]->size =
- zbud_get_pool_size(zcache.pools[i]->pool);
- running = false;
- end:
- return freed;
- }
- static struct shrinker zcache_shrinker = {
- .shrink = zcache_shrink,
- .seeks = DEFAULT_SEEKS * 16
- };
- /*
- * Compression functions
- * (Below functions are copyed from zswap!)
- */
- static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
- enum comp_op {
- ZCACHE_COMPOP_COMPRESS,
- ZCACHE_COMPOP_DECOMPRESS
- };
- static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen)
- {
- struct crypto_comp *tfm;
- int ret;
- tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
- switch (op) {
- case ZCACHE_COMPOP_COMPRESS:
- ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
- break;
- case ZCACHE_COMPOP_DECOMPRESS:
- ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
- break;
- default:
- ret = -EINVAL;
- }
- put_cpu();
- return ret;
- }
- static int __init zcache_comp_init(void)
- {
- if (!crypto_has_comp(zcache_compressor, 0, 0)) {
- pr_info("%s compressor not available\n", zcache_compressor);
- /* fall back to default compressor */
- zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
- if (!crypto_has_comp(zcache_compressor, 0, 0))
- /* can't even load the default compressor */
- return -ENODEV;
- }
- pr_info("using %s compressor\n", zcache_compressor);
- /* alloc percpu transforms */
- zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
- if (!zcache_comp_pcpu_tfms)
- return -ENOMEM;
- return 0;
- }
- static void zcache_comp_exit(void)
- {
- /* free percpu transforms */
- if (zcache_comp_pcpu_tfms)
- free_percpu(zcache_comp_pcpu_tfms);
- }
- /*
- * Per-cpu code
- * (Below functions are also copyed from zswap!)
- */
- static DEFINE_PER_CPU(u8 *, zcache_dstmem);
- static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
- {
- struct crypto_comp *tfm;
- u8 *dst;
- switch (action) {
- case CPU_UP_PREPARE:
- tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
- if (IS_ERR(tfm)) {
- pr_err("can't allocate compressor transform\n");
- return NOTIFY_BAD;
- }
- *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
- dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
- if (!dst) {
- pr_err("can't allocate compressor buffer\n");
- crypto_free_comp(tfm);
- *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
- return NOTIFY_BAD;
- }
- per_cpu(zcache_dstmem, cpu) = dst;
- break;
- case CPU_DEAD:
- case CPU_UP_CANCELED:
- tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
- if (tfm) {
- crypto_free_comp(tfm);
- *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
- }
- dst = per_cpu(zcache_dstmem, cpu);
- kfree(dst);
- per_cpu(zcache_dstmem, cpu) = NULL;
- break;
- default:
- break;
- }
- return NOTIFY_OK;
- }
- static int zcache_cpu_notifier(struct notifier_block *nb,
- unsigned long action, void *pcpu)
- {
- unsigned long cpu = (unsigned long)pcpu;
- return __zcache_cpu_notifier(action, cpu);
- }
- static struct notifier_block zcache_cpu_notifier_block = {
- .notifier_call = zcache_cpu_notifier
- };
- static int zcache_cpu_init(void)
- {
- unsigned long cpu;
- get_online_cpus();
- for_each_online_cpu(cpu)
- if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
- goto cleanup;
- register_cpu_notifier(&zcache_cpu_notifier_block);
- put_online_cpus();
- return 0;
- cleanup:
- for_each_online_cpu(cpu)
- __zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
- put_online_cpus();
- return -ENOMEM;
- }
- /*
- * Zcache helpers
- */
- static bool zcache_is_full(void)
- {
- long file = global_page_state(NR_FILE_PAGES);
- return ((totalram_pages * zcache_max_pool_percent / 100 <
- zcache_pages()) ||
- (totalram_pages * zcache_clear_percent / 100 >
- file));
- }
- /*
- * The caller must hold zpool->rb_lock at least
- */
- static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree,
- int index, struct rb_node **rb_parent, struct rb_node ***rb_link)
- {
- struct zcache_rbnode *entry;
- struct rb_node **__rb_link, *__rb_parent, *rb_prev;
- __rb_link = &rbtree->rb_node;
- rb_prev = __rb_parent = NULL;
- while (*__rb_link) {
- __rb_parent = *__rb_link;
- entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
- if (entry->rb_index > index)
- __rb_link = &__rb_parent->rb_left;
- else if (entry->rb_index < index) {
- rb_prev = __rb_parent;
- __rb_link = &__rb_parent->rb_right;
- } else
- return entry;
- }
- if (rb_parent)
- *rb_parent = __rb_parent;
- if (rb_link)
- *rb_link = __rb_link;
- return NULL;
- }
- static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool,
- int rb_index)
- {
- unsigned long flags;
- struct zcache_rbnode *rbnode;
- read_lock_irqsave(&zpool->rb_lock, flags);
- rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
- if (rbnode)
- kref_get(&rbnode->refcount);
- read_unlock_irqrestore(&zpool->rb_lock, flags);
- return rbnode;
- }
- /*
- * kref_put callback for zcache_rbnode.
- *
- * The rbnode must have been isolated from rbtree already.
- */
- static void zcache_rbnode_release(struct kref *kref)
- {
- struct zcache_rbnode *rbnode;
- rbnode = container_of(kref, struct zcache_rbnode, refcount);
- BUG_ON(rbnode->ratree.rnode);
- kmem_cache_free(zcache_rbnode_cache, rbnode);
- }
- /*
- * Check whether the radix-tree of this rbnode is empty.
- * If that's true, then we can delete this zcache_rbnode from
- * zcache_pool->rbtree
- *
- * Caller must hold zcache_rbnode->ra_lock
- */
- static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
- {
- return rbnode->ratree.rnode == NULL;
- }
- /*
- * Remove zcache_rbnode from zpool->rbtree
- *
- * holded_rblock - whether the caller has holded zpool->rb_lock
- */
- static void zcache_rbnode_isolate(struct zcache_pool *zpool,
- struct zcache_rbnode *rbnode, bool holded_rblock)
- {
- unsigned long flags;
- if (!holded_rblock)
- write_lock_irqsave(&zpool->rb_lock, flags);
- /*
- * Someone can get reference on this rbnode before we could
- * acquire write lock above.
- * We want to remove it from zpool->rbtree when only the caller and
- * corresponding ratree holds a reference to this rbnode.
- * Below check ensures that a racing zcache put will not end up adding
- * a page to an isolated node and thereby losing that memory.
- */
- if (atomic_read(&rbnode->refcount.refcount) == 2) {
- rb_erase(&rbnode->rb_node, &zpool->rbtree);
- RB_CLEAR_NODE(&rbnode->rb_node);
- kref_put(&rbnode->refcount, zcache_rbnode_release);
- }
- if (!holded_rblock)
- write_unlock_irqrestore(&zpool->rb_lock, flags);
- }
- /*
- * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
- */
- static int zcache_store_zaddr(struct zcache_pool *zpool,
- int ra_index, int rb_index, unsigned long zaddr)
- {
- unsigned long flags;
- struct zcache_rbnode *rbnode, *tmp;
- struct rb_node **link = NULL, *parent = NULL;
- int ret;
- void *dup_zaddr;
- rbnode = zcache_find_get_rbnode(zpool, rb_index);
- if (!rbnode) {
- /* alloc and init a new rbnode */
- rbnode = kmem_cache_alloc(zcache_rbnode_cache,
- GFP_ZCACHE);
- if (!rbnode)
- return -ENOMEM;
- INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
- spin_lock_init(&rbnode->ra_lock);
- rbnode->rb_index = rb_index;
- kref_init(&rbnode->refcount);
- RB_CLEAR_NODE(&rbnode->rb_node);
- /* add that rbnode to rbtree */
- write_lock_irqsave(&zpool->rb_lock, flags);
- tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
- &parent, &link);
- if (tmp) {
- /* somebody else allocated new rbnode */
- kmem_cache_free(zcache_rbnode_cache, rbnode);
- rbnode = tmp;
- } else {
- rb_link_node(&rbnode->rb_node, parent, link);
- rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
- }
- /* Inc the reference of this zcache_rbnode */
- kref_get(&rbnode->refcount);
- write_unlock_irqrestore(&zpool->rb_lock, flags);
- }
- /* Succfully got a zcache_rbnode when arriving here */
- spin_lock_irqsave(&rbnode->ra_lock, flags);
- dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
- if (unlikely(dup_zaddr)) {
- if (dup_zaddr == ZERO_HANDLE) {
- atomic_dec(&zcache_stored_zero_pages);
- } else {
- zbud_free(zpool->pool, (unsigned long)dup_zaddr);
- atomic_dec(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(zpool->pool);
- }
- zcache_dup_entry++;
- }
- /* Insert zcache_ra_handle to ratree */
- ret = radix_tree_insert(&rbnode->ratree, ra_index,
- (void *)zaddr);
- spin_unlock_irqrestore(&rbnode->ra_lock, flags);
- if (unlikely(ret)) {
- write_lock_irqsave(&zpool->rb_lock, flags);
- spin_lock(&rbnode->ra_lock);
- if (zcache_rbnode_empty(rbnode))
- zcache_rbnode_isolate(zpool, rbnode, 1);
- spin_unlock(&rbnode->ra_lock);
- write_unlock_irqrestore(&zpool->rb_lock, flags);
- }
- kref_put(&rbnode->refcount, zcache_rbnode_release);
- return ret;
- }
- /*
- * Load zaddr and delete it from radix tree.
- * If the radix tree of the corresponding rbnode is empty, delete the rbnode
- * from zpool->rbtree also.
- */
- static void *zcache_load_delete_zaddr(struct zcache_pool *zpool,
- int rb_index, int ra_index)
- {
- struct zcache_rbnode *rbnode;
- void *zaddr = NULL;
- unsigned long flags;
- rbnode = zcache_find_get_rbnode(zpool, rb_index);
- if (!rbnode)
- goto out;
- BUG_ON(rbnode->rb_index != rb_index);
- spin_lock_irqsave(&rbnode->ra_lock, flags);
- zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
- spin_unlock_irqrestore(&rbnode->ra_lock, flags);
- /* rb_lock and ra_lock must be taken again in the given sequence */
- write_lock_irqsave(&zpool->rb_lock, flags);
- spin_lock(&rbnode->ra_lock);
- if (zcache_rbnode_empty(rbnode))
- zcache_rbnode_isolate(zpool, rbnode, 1);
- spin_unlock(&rbnode->ra_lock);
- write_unlock_irqrestore(&zpool->rb_lock, flags);
- kref_put(&rbnode->refcount, zcache_rbnode_release);
- out:
- return zaddr;
- }
- static bool zero_page(struct page *page)
- {
- unsigned long *ptr = kmap_atomic(page);
- int i;
- bool ret = false;
- for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
- if (ptr[i])
- goto out;
- }
- ret = true;
- out:
- kunmap_atomic(ptr);
- return ret;
- }
- static void zcache_store_page(int pool_id, struct cleancache_filekey key,
- pgoff_t index, struct page *page)
- {
- struct zcache_ra_handle *zhandle;
- u8 *zpage, *src, *dst;
- /* Address of zhandle + compressed data(zpage) */
- unsigned long zaddr = 0;
- unsigned int zlen = PAGE_SIZE;
- bool zero = 0;
- int ret;
- struct zcache_pool *zpool = zcache.pools[pool_id];
- /*
- * Zcache will be ineffective if the compressed memory pool is full with
- * compressed inactive file pages and most of them will never be used
- * again.
- * So we refuse to compress pages that are not from active file list.
- */
- if (!PageWasActive(page)) {
- zcache_inactive_pages_refused++;
- return;
- }
- zero = zero_page(page);
- if (zero)
- goto zero;
- if (zcache_is_full()) {
- zcache_pool_limit_hit++;
- if (zbud_reclaim_page(zpool->pool, 8)) {
- zcache_reclaim_fail++;
- return;
- }
- /*
- * Continue if reclaimed a page frame succ.
- */
- zcache_evict_filepages++;
- zpool->size = zbud_get_pool_size(zpool->pool);
- }
- /* compress */
- dst = get_cpu_var(zcache_dstmem);
- src = kmap_atomic(page);
- ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
- &zlen);
- kunmap_atomic(src);
- if (ret) {
- pr_err("zcache compress error ret %d\n", ret);
- put_cpu_var(zcache_dstmem);
- return;
- }
- /* store zcache handle together with compressed page data */
- ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
- GFP_ZCACHE, &zaddr);
- if (ret) {
- zcache_zbud_alloc_fail++;
- put_cpu_var(zcache_dstmem);
- return;
- }
- zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);
- /* Compressed page data stored at the end of zcache_ra_handle */
- zpage = (u8 *)(zhandle + 1);
- memcpy(zpage, dst, zlen);
- zbud_unmap(zpool->pool, zaddr);
- put_cpu_var(zcache_dstmem);
- zero:
- if (zero)
- zaddr = (unsigned long)ZERO_HANDLE;
- /* store zcache handle */
- ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
- if (ret) {
- zcache_store_failed++;
- if (!zero)
- zbud_free(zpool->pool, zaddr);
- return;
- }
- /* update stats */
- if (zero) {
- atomic_inc(&zcache_stored_zero_pages);
- } else {
- zhandle->ra_index = index;
- zhandle->rb_index = key.u.ino;
- zhandle->zlen = zlen;
- zhandle->zpool = zpool;
- atomic_inc(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(zpool->pool);
- }
- return;
- }
- static int zcache_load_page(int pool_id, struct cleancache_filekey key,
- pgoff_t index, struct page *page)
- {
- int ret = 0;
- u8 *src, *dst;
- void *zaddr;
- unsigned int dlen = PAGE_SIZE;
- struct zcache_ra_handle *zhandle;
- struct zcache_pool *zpool = zcache.pools[pool_id];
- zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
- if (!zaddr)
- return -ENOENT;
- else if (zaddr == ZERO_HANDLE)
- goto map;
- zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
- (unsigned long)zaddr);
- /* Compressed page data stored at the end of zcache_ra_handle */
- src = (u8 *)(zhandle + 1);
- /* decompress */
- map:
- dst = kmap_atomic(page);
- if (zaddr != ZERO_HANDLE) {
- ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
- zhandle->zlen, dst, &dlen);
- } else {
- memset(dst, 0, PAGE_SIZE);
- kunmap_atomic(dst);
- flush_dcache_page(page);
- atomic_dec(&zcache_stored_zero_pages);
- goto out;
- }
- kunmap_atomic(dst);
- zbud_unmap(zpool->pool, (unsigned long)zaddr);
- zbud_free(zpool->pool, (unsigned long)zaddr);
- BUG_ON(ret);
- BUG_ON(dlen != PAGE_SIZE);
- /* update stats */
- atomic_dec(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(zpool->pool);
- out:
- SetPageWasActive(page);
- return ret;
- }
- static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
- pgoff_t index)
- {
- struct zcache_pool *zpool = zcache.pools[pool_id];
- void *zaddr = NULL;
- zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
- if (zaddr && (zaddr != ZERO_HANDLE)) {
- zbud_free(zpool->pool, (unsigned long)zaddr);
- atomic_dec(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(zpool->pool);
- } else if (zaddr == ZERO_HANDLE) {
- atomic_dec(&zcache_stored_zero_pages);
- }
- }
- #define FREE_BATCH 16
- /*
- * Callers must hold the lock
- */
- static void zcache_flush_ratree(struct zcache_pool *zpool,
- struct zcache_rbnode *rbnode)
- {
- unsigned long index = 0;
- int count, i;
- struct zcache_ra_handle *zhandle;
- void *zaddr = NULL;
- do {
- void *zaddrs[FREE_BATCH];
- unsigned long indices[FREE_BATCH];
- count = radix_tree_gang_lookup_index(&rbnode->ratree,
- (void **)zaddrs, indices,
- index, FREE_BATCH);
- for (i = 0; i < count; i++) {
- if (zaddrs[i] == ZERO_HANDLE) {
- zaddr = radix_tree_delete(&rbnode->ratree,
- indices[i]);
- if (zaddr)
- atomic_dec(&zcache_stored_zero_pages);
- continue;
- }
- zhandle = (struct zcache_ra_handle *)zbud_map(
- zpool->pool, (unsigned long)zaddrs[i]);
- index = zhandle->ra_index;
- zaddr = radix_tree_delete(&rbnode->ratree, index);
- if (!zaddr)
- continue;
- zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
- zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
- atomic_dec(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(zpool->pool);
- }
- index++;
- } while (count == FREE_BATCH);
- }
- static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
- {
- struct zcache_rbnode *rbnode;
- unsigned long flags1, flags2;
- struct zcache_pool *zpool = zcache.pools[pool_id];
- /*
- * Refuse new pages added in to the same rbinode, so get rb_lock at
- * first.
- */
- write_lock_irqsave(&zpool->rb_lock, flags1);
- rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
- if (!rbnode) {
- write_unlock_irqrestore(&zpool->rb_lock, flags1);
- return;
- }
- kref_get(&rbnode->refcount);
- spin_lock_irqsave(&rbnode->ra_lock, flags2);
- zcache_flush_ratree(zpool, rbnode);
- if (zcache_rbnode_empty(rbnode))
- /* When arrvied here, we already hold rb_lock */
- zcache_rbnode_isolate(zpool, rbnode, 1);
- spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
- write_unlock_irqrestore(&zpool->rb_lock, flags1);
- kref_put(&rbnode->refcount, zcache_rbnode_release);
- }
- static void zcache_destroy_pool(struct zcache_pool *zpool);
- static void zcache_flush_fs(int pool_id)
- {
- struct zcache_rbnode *z_rbnode = NULL;
- struct rb_node *rbnode;
- unsigned long flags1, flags2;
- struct zcache_pool *zpool;
- if (pool_id < 0)
- return;
- zpool = zcache.pools[pool_id];
- if (!zpool)
- return;
- /*
- * Refuse new pages added in, so get rb_lock at first.
- */
- write_lock_irqsave(&zpool->rb_lock, flags1);
- rbnode = rb_first(&zpool->rbtree);
- while (rbnode) {
- z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
- rbnode = rb_next(rbnode);
- if (z_rbnode) {
- kref_get(&z_rbnode->refcount);
- spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
- zcache_flush_ratree(zpool, z_rbnode);
- if (zcache_rbnode_empty(z_rbnode))
- zcache_rbnode_isolate(zpool, z_rbnode, 1);
- spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
- kref_put(&z_rbnode->refcount, zcache_rbnode_release);
- }
- }
- write_unlock_irqrestore(&zpool->rb_lock, flags1);
- zcache_destroy_pool(zpool);
- }
- /*
- * Evict compressed pages from zcache pool on an LRU basis after the compressed
- * pool is full.
- */
- static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
- {
- struct zcache_pool *zpool;
- struct zcache_ra_handle *zhandle;
- void *zaddr_intree;
- BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);
- zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);
- zpool = zhandle->zpool;
- /* There can be a race with zcache store */
- if (!zpool)
- return -EINVAL;
- BUG_ON(pool != zpool->pool);
- zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
- zhandle->ra_index);
- if (zaddr_intree) {
- BUG_ON((unsigned long)zaddr_intree != zaddr);
- zbud_unmap(pool, zaddr);
- zbud_free(pool, zaddr);
- atomic_dec(&zcache_stored_pages);
- zpool->size = zbud_get_pool_size(pool);
- zcache_evict_zpages++;
- }
- return 0;
- }
- static struct zbud_ops zcache_zbud_ops = {
- .evict = zcache_evict_zpage
- };
- /* Return pool id */
- static int zcache_create_pool(void)
- {
- int ret;
- struct zcache_pool *zpool;
- zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
- if (!zpool) {
- ret = -ENOMEM;
- goto out;
- }
- zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
- if (!zpool->pool) {
- kfree(zpool);
- ret = -ENOMEM;
- goto out;
- }
- spin_lock(&zcache.pool_lock);
- if (zcache.num_pools == MAX_ZCACHE_POOLS) {
- pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
- zbud_destroy_pool(zpool->pool);
- kfree(zpool);
- ret = -EPERM;
- goto out_unlock;
- }
- rwlock_init(&zpool->rb_lock);
- zpool->rbtree = RB_ROOT;
- /* Add to pool list */
- for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
- if (!zcache.pools[ret])
- break;
- zcache.pools[ret] = zpool;
- zcache.num_pools++;
- pr_info("New pool created id:%d\n", ret);
- out_unlock:
- spin_unlock(&zcache.pool_lock);
- out:
- return ret;
- }
- static void zcache_destroy_pool(struct zcache_pool *zpool)
- {
- int i;
- if (!zpool)
- return;
- spin_lock(&zcache.pool_lock);
- zcache.num_pools--;
- for (i = 0; i < MAX_ZCACHE_POOLS; i++)
- if (zcache.pools[i] == zpool)
- break;
- zcache.pools[i] = NULL;
- spin_unlock(&zcache.pool_lock);
- if (!RB_EMPTY_ROOT(&zpool->rbtree))
- WARN_ON("Memory leak detected. Freeing non-empty pool!\n");
- zbud_destroy_pool(zpool->pool);
- kfree(zpool);
- }
- static int zcache_init_fs(size_t pagesize)
- {
- int ret;
- if (pagesize != PAGE_SIZE) {
- pr_info("Unsupported page size: %zu", pagesize);
- ret = -EINVAL;
- goto out;
- }
- ret = zcache_create_pool();
- if (ret < 0) {
- pr_info("Failed to create new pool\n");
- ret = -ENOMEM;
- goto out;
- }
- out:
- return ret;
- }
- static int zcache_init_shared_fs(char *uuid, size_t pagesize)
- {
- /* shared pools are unsupported and map to private */
- return zcache_init_fs(pagesize);
- }
- static struct cleancache_ops zcache_ops = {
- .put_page = zcache_store_page,
- .get_page = zcache_load_page,
- .invalidate_page = zcache_flush_page,
- .invalidate_inode = zcache_flush_inode,
- .invalidate_fs = zcache_flush_fs,
- .init_shared_fs = zcache_init_shared_fs,
- .init_fs = zcache_init_fs
- };
- /*
- * Debugfs functions
- */
- #ifdef CONFIG_DEBUG_FS
- #include <linux/debugfs.h>
- static int pool_pages_get(void *_data, u64 *val)
- {
- *val = zcache_pages();
- return 0;
- }
- DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");
- static struct dentry *zcache_debugfs_root;
- static int __init zcache_debugfs_init(void)
- {
- if (!debugfs_initialized())
- return -ENODEV;
- zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
- if (!zcache_debugfs_root)
- return -ENOMEM;
- debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
- &zcache_pool_limit_hit);
- debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
- &zcache_zbud_alloc_fail);
- debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
- &zcache_dup_entry);
- debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
- &pool_page_fops);
- debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
- &zcache_stored_pages);
- debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
- zcache_debugfs_root, &zcache_stored_zero_pages);
- debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
- &zcache_evict_zpages);
- debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
- &zcache_evict_filepages);
- debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
- &zcache_reclaim_fail);
- debugfs_create_u64("inactive_pages_refused", S_IRUGO,
- zcache_debugfs_root, &zcache_inactive_pages_refused);
- debugfs_create_u64("pool_shrink_count", S_IRUGO,
- zcache_debugfs_root, &zcache_pool_shrink);
- debugfs_create_u64("pool_shrink_fail", S_IRUGO,
- zcache_debugfs_root, &zcache_pool_shrink_fail);
- debugfs_create_u64("pool_shrink_pages", S_IRUGO,
- zcache_debugfs_root, &zcache_pool_shrink_pages);
- debugfs_create_u64("store_fail", S_IRUGO,
- zcache_debugfs_root, &zcache_store_failed);
- return 0;
- }
- static void __exit zcache_debugfs_exit(void)
- {
- debugfs_remove_recursive(zcache_debugfs_root);
- }
- #else
- static int __init zcache_debugfs_init(void)
- {
- return 0;
- }
- static void __exit zcache_debugfs_exit(void)
- {
- }
- #endif
- /*
- * zcache init and exit
- */
- static int __init init_zcache(void)
- {
- if (!zcache_enabled)
- return 0;
- pr_info("loading zcache..\n");
- if (zcache_rbnode_cache_create()) {
- pr_err("entry cache creation failed\n");
- goto error;
- }
- if (zcache_comp_init()) {
- pr_err("compressor initialization failed\n");
- goto compfail;
- }
- if (zcache_cpu_init()) {
- pr_err("per-cpu initialization failed\n");
- goto pcpufail;
- }
- spin_lock_init(&zcache.pool_lock);
- cleancache_register_ops(&zcache_ops);
- if (zcache_debugfs_init())
- pr_warn("debugfs initialization failed\n");
- register_shrinker(&zcache_shrinker);
- return 0;
- pcpufail:
- zcache_comp_exit();
- compfail:
- zcache_rbnode_cache_destroy();
- error:
- return -ENOMEM;
- }
- /* must be late so crypto has time to come up */
- late_initcall(init_zcache);
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
- MODULE_DESCRIPTION("Compressed cache for clean file pages");
|