blob: 66396e6450c8c7be08e2ceaec221a79bace00a5f [file] [log] [blame]
Steve Kondikf94910d2016-04-14 02:52:21 -07001/*
2 * linux/mm/zcache.c
3 *
4 * A cleancache backend for file pages compression.
5 * Concepts based on original zcache by Dan Magenheimer.
6 * Copyright (C) 2013 Bob Liu <bob.liu@xxxxxxxxxx>
7 *
8 * With zcache, active file pages can be compressed in memory during page
9 * reclaiming. When their data is needed again the I/O reading operation is
10 * avoided. This results in a significant performance gain under memory pressure
11 * for systems with many file pages.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22*/
23
24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26#include <linux/atomic.h>
27#include <linux/cleancache.h>
28#include <linux/cpu.h>
29#include <linux/crypto.h>
30#include <linux/page-flags.h>
31#include <linux/pagemap.h>
32#include <linux/highmem.h>
33#include <linux/mm_types.h>
34#include <linux/module.h>
35#include <linux/slab.h>
36#include <linux/spinlock.h>
37#include <linux/radix-tree.h>
38#include <linux/rbtree.h>
39#include <linux/types.h>
40#include <linux/zbud.h>
41
42/*
43 * Enable/disable zcache (disabled by default)
44 */
45static bool zcache_enabled __read_mostly;
46module_param_named(enabled, zcache_enabled, bool, 0);
47
48/*
49 * Compressor to be used by zcache
50 */
51#define ZCACHE_COMPRESSOR_DEFAULT "lzo"
52static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
53module_param_named(compressor, zcache_compressor, charp, 0);
54
55/*
56 * The maximum percentage of memory that the compressed pool can occupy.
57 */
58static unsigned int zcache_max_pool_percent = 10;
59module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);
60
61static unsigned int zcache_clear_percent = 4;
62module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
63/*
64 * zcache statistics
65 */
66static u64 zcache_pool_limit_hit;
67static u64 zcache_dup_entry;
68static u64 zcache_zbud_alloc_fail;
69static u64 zcache_evict_zpages;
70static u64 zcache_evict_filepages;
71static u64 zcache_inactive_pages_refused;
72static u64 zcache_reclaim_fail;
73static u64 zcache_pool_shrink;
74static u64 zcache_pool_shrink_fail;
75static u64 zcache_pool_shrink_pages;
76static u64 zcache_store_failed;
77static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
78static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);
79
80#define GFP_ZCACHE \
81 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \
82 __GFP_NOMEMALLOC | __GFP_NO_KSWAPD | __GFP_ZERO)
83
84/*
85 * Make sure this is different from radix tree
86 * indirect ptr or exceptional entry.
87 */
88#define ZERO_HANDLE ((void *)~(~0UL >> 1))
89
90/*
91 * Zcache receives pages for compression through the Cleancache API and is able
92 * to evict pages from its own compressed pool on an LRU basis in the case that
93 * the compressed pool is full.
94 *
95 * Zcache makes use of zbud for the managing the compressed memory pool. Each
96 * allocation in zbud is not directly accessible by address. Rather, a handle
97 * (zaddr) is return by the allocation routine and that handle(zaddr must be
98 * mapped before being accessed. The compressed memory pool grows on demand and
99 * shrinks as compressed pages are freed.
100 *
101 * When a file page is passed from cleancache to zcache, zcache maintains a
102 * mapping of the <filesystem_type, inode_number, page_index> to the zbud
103 * address that references that compressed file page. This mapping is achieved
104 * with a red-black tree per filesystem type, plus a radix tree per red-black
105 * node.
106 *
107 * A zcache pool with pool_id as the index is created when a filesystem mounted
108 * Each zcache pool has a red-black tree, the inode number(rb_index) is the
109 * search key. Each red-black tree node has a radix tree which use
110 * page->index(ra_index) as the index. Each radix tree slot points to the zbud
111 * address combining with some extra information(zcache_ra_handle).
112 */
113#define MAX_ZCACHE_POOLS 32
114/*
115 * One zcache_pool per (cleancache aware) filesystem mount instance
116 */
117struct zcache_pool {
118 struct rb_root rbtree;
119 rwlock_t rb_lock; /* Protects rbtree */
120 u64 size;
121 struct zbud_pool *pool; /* Zbud pool used */
122};
123
124/*
125 * Manage all zcache pools
126 */
127struct _zcache {
128 struct zcache_pool *pools[MAX_ZCACHE_POOLS];
129 u32 num_pools; /* Current no. of zcache pools */
130 spinlock_t pool_lock; /* Protects pools[] and num_pools */
131};
132struct _zcache zcache;
133
134/*
135 * Redblack tree node, each node has a page index radix-tree.
136 * Indexed by inode nubmer.
137 */
138struct zcache_rbnode {
139 struct rb_node rb_node;
140 int rb_index;
141 struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
142 spinlock_t ra_lock; /* Protects radix tree */
143 struct kref refcount;
144};
145
146/*
147 * Radix-tree leaf, indexed by page->index
148 */
149struct zcache_ra_handle {
150 int rb_index; /* Redblack tree index */
151 int ra_index; /* Radix tree index */
152 int zlen; /* Compressed page size */
153 struct zcache_pool *zpool; /* Finding zcache_pool during evict */
154};
155
156u64 zcache_pages(void)
157{
158 int i;
159 u64 count = 0;
160
161 for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
162 count += zcache.pools[i]->size;
163
164 return count;
165}
166
167static struct kmem_cache *zcache_rbnode_cache;
168static int zcache_rbnode_cache_create(void)
169{
170 zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
171 return zcache_rbnode_cache == NULL;
172}
173static void zcache_rbnode_cache_destroy(void)
174{
175 kmem_cache_destroy(zcache_rbnode_cache);
176}
177
178static int zcache_shrink(struct shrinker *s, struct shrink_control *sc)
179{
180 unsigned long active_file;
181 unsigned long file;
182 long file_gap;
183 unsigned long freed = 0;
184 unsigned long pool;
185 static bool running;
186 int i = 0;
187 int retries;
188
189 if (running)
190 goto end;
191
192 running = true;
193 active_file = global_page_state(NR_ACTIVE_FILE);
194 file = global_page_state(NR_FILE_PAGES);
195 pool = zcache_pages();
196
197 file_gap = pool - file;
198
199 if ((file_gap >= 0) &&
200 (totalram_pages * zcache_clear_percent / 100 > file)) {
201 file_gap = pool;
202 zcache_pool_shrink++;
203 goto reclaim;
204 }
205
206 /*
207 * file_gap == 0 means that the number of pages
208 * stored by zcache is around twice as many as the
209 * number of active file pages.
210 */
211 file_gap = pool - active_file;
212 if (file_gap < 0)
213 file_gap = 0;
214 else
215 zcache_pool_shrink++;
216
217reclaim:
218 retries = file_gap;
219 while ((file_gap > 0) && retries) {
220 struct zcache_pool *zpool =
221 zcache.pools[i++ % MAX_ZCACHE_POOLS];
222 if (!zpool || !zpool->size)
223 continue;
224 if (zbud_reclaim_page(zpool->pool, 8)) {
225 zcache_pool_shrink_fail++;
226 retries--;
227 continue;
228 }
229 freed++;
230 file_gap--;
231 }
232
233 zcache_pool_shrink_pages += freed;
234 for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
235 zcache.pools[i]->size =
236 zbud_get_pool_size(zcache.pools[i]->pool);
237
238 running = false;
239end:
240 return freed;
241}
242
243static struct shrinker zcache_shrinker = {
244 .shrink = zcache_shrink,
245 .seeks = DEFAULT_SEEKS * 16
246};
247
248/*
249 * Compression functions
250 * (Below functions are copyed from zswap!)
251 */
252static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
253
254enum comp_op {
255 ZCACHE_COMPOP_COMPRESS,
256 ZCACHE_COMPOP_DECOMPRESS
257};
258
259static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
260 u8 *dst, unsigned int *dlen)
261{
262 struct crypto_comp *tfm;
263 int ret;
264
265 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
266 switch (op) {
267 case ZCACHE_COMPOP_COMPRESS:
268 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
269 break;
270 case ZCACHE_COMPOP_DECOMPRESS:
271 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
272 break;
273 default:
274 ret = -EINVAL;
275 }
276
277 put_cpu();
278 return ret;
279}
280
281static int __init zcache_comp_init(void)
282{
283 if (!crypto_has_comp(zcache_compressor, 0, 0)) {
284 pr_info("%s compressor not available\n", zcache_compressor);
285 /* fall back to default compressor */
286 zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
287 if (!crypto_has_comp(zcache_compressor, 0, 0))
288 /* can't even load the default compressor */
289 return -ENODEV;
290 }
291 pr_info("using %s compressor\n", zcache_compressor);
292
293 /* alloc percpu transforms */
294 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
295 if (!zcache_comp_pcpu_tfms)
296 return -ENOMEM;
297 return 0;
298}
299
300static void zcache_comp_exit(void)
301{
302 /* free percpu transforms */
303 if (zcache_comp_pcpu_tfms)
304 free_percpu(zcache_comp_pcpu_tfms);
305}
306
307/*
308 * Per-cpu code
309 * (Below functions are also copyed from zswap!)
310 */
311static DEFINE_PER_CPU(u8 *, zcache_dstmem);
312
313static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
314{
315 struct crypto_comp *tfm;
316 u8 *dst;
317
318 switch (action) {
319 case CPU_UP_PREPARE:
320 tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
321 if (IS_ERR(tfm)) {
322 pr_err("can't allocate compressor transform\n");
323 return NOTIFY_BAD;
324 }
325 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
326 dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
327 if (!dst) {
328 pr_err("can't allocate compressor buffer\n");
329 crypto_free_comp(tfm);
330 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
331 return NOTIFY_BAD;
332 }
333 per_cpu(zcache_dstmem, cpu) = dst;
334 break;
335 case CPU_DEAD:
336 case CPU_UP_CANCELED:
337 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
338 if (tfm) {
339 crypto_free_comp(tfm);
340 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
341 }
342 dst = per_cpu(zcache_dstmem, cpu);
343 kfree(dst);
344 per_cpu(zcache_dstmem, cpu) = NULL;
345 break;
346 default:
347 break;
348 }
349 return NOTIFY_OK;
350}
351
352static int zcache_cpu_notifier(struct notifier_block *nb,
353 unsigned long action, void *pcpu)
354{
355 unsigned long cpu = (unsigned long)pcpu;
356
357 return __zcache_cpu_notifier(action, cpu);
358}
359
360static struct notifier_block zcache_cpu_notifier_block = {
361 .notifier_call = zcache_cpu_notifier
362};
363
364static int zcache_cpu_init(void)
365{
366 unsigned long cpu;
367
368 get_online_cpus();
369 for_each_online_cpu(cpu)
370 if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
371 goto cleanup;
372 register_cpu_notifier(&zcache_cpu_notifier_block);
373 put_online_cpus();
374 return 0;
375
376cleanup:
377 for_each_online_cpu(cpu)
378 __zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
379 put_online_cpus();
380 return -ENOMEM;
381}
382
383/*
384 * Zcache helpers
385 */
386static bool zcache_is_full(void)
387{
388 long file = global_page_state(NR_FILE_PAGES);
389
390 return ((totalram_pages * zcache_max_pool_percent / 100 <
391 zcache_pages()) ||
392 (totalram_pages * zcache_clear_percent / 100 >
393 file));
394}
395
396/*
397 * The caller must hold zpool->rb_lock at least
398 */
399static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree,
400 int index, struct rb_node **rb_parent, struct rb_node ***rb_link)
401{
402 struct zcache_rbnode *entry;
403 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
404
405 __rb_link = &rbtree->rb_node;
406 rb_prev = __rb_parent = NULL;
407
408 while (*__rb_link) {
409 __rb_parent = *__rb_link;
410 entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
411 if (entry->rb_index > index)
412 __rb_link = &__rb_parent->rb_left;
413 else if (entry->rb_index < index) {
414 rb_prev = __rb_parent;
415 __rb_link = &__rb_parent->rb_right;
416 } else
417 return entry;
418 }
419
420 if (rb_parent)
421 *rb_parent = __rb_parent;
422 if (rb_link)
423 *rb_link = __rb_link;
424 return NULL;
425}
426
427static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool,
428 int rb_index)
429{
430 unsigned long flags;
431 struct zcache_rbnode *rbnode;
432
433 read_lock_irqsave(&zpool->rb_lock, flags);
434 rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
435 if (rbnode)
436 kref_get(&rbnode->refcount);
437 read_unlock_irqrestore(&zpool->rb_lock, flags);
438 return rbnode;
439}
440
441/*
442 * kref_put callback for zcache_rbnode.
443 *
444 * The rbnode must have been isolated from rbtree already.
445 */
446static void zcache_rbnode_release(struct kref *kref)
447{
448 struct zcache_rbnode *rbnode;
449
450 rbnode = container_of(kref, struct zcache_rbnode, refcount);
451 BUG_ON(rbnode->ratree.rnode);
452 kmem_cache_free(zcache_rbnode_cache, rbnode);
453}
454
455/*
456 * Check whether the radix-tree of this rbnode is empty.
457 * If that's true, then we can delete this zcache_rbnode from
458 * zcache_pool->rbtree
459 *
460 * Caller must hold zcache_rbnode->ra_lock
461 */
462static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
463{
464 return rbnode->ratree.rnode == NULL;
465}
466
467/*
468 * Remove zcache_rbnode from zpool->rbtree
469 *
470 * holded_rblock - whether the caller has holded zpool->rb_lock
471 */
472static void zcache_rbnode_isolate(struct zcache_pool *zpool,
473 struct zcache_rbnode *rbnode, bool holded_rblock)
474{
475 unsigned long flags;
476
477 if (!holded_rblock)
478 write_lock_irqsave(&zpool->rb_lock, flags);
479 /*
480 * Someone can get reference on this rbnode before we could
481 * acquire write lock above.
482 * We want to remove it from zpool->rbtree when only the caller and
483 * corresponding ratree holds a reference to this rbnode.
484 * Below check ensures that a racing zcache put will not end up adding
485 * a page to an isolated node and thereby losing that memory.
486 */
487 if (atomic_read(&rbnode->refcount.refcount) == 2) {
488 rb_erase(&rbnode->rb_node, &zpool->rbtree);
489 RB_CLEAR_NODE(&rbnode->rb_node);
490 kref_put(&rbnode->refcount, zcache_rbnode_release);
491 }
492 if (!holded_rblock)
493 write_unlock_irqrestore(&zpool->rb_lock, flags);
494}
495
496/*
497 * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
498 */
499static int zcache_store_zaddr(struct zcache_pool *zpool,
500 int ra_index, int rb_index, unsigned long zaddr)
501{
502 unsigned long flags;
503 struct zcache_rbnode *rbnode, *tmp;
504 struct rb_node **link = NULL, *parent = NULL;
505 int ret;
506 void *dup_zaddr;
507
508 rbnode = zcache_find_get_rbnode(zpool, rb_index);
509 if (!rbnode) {
510 /* alloc and init a new rbnode */
511 rbnode = kmem_cache_alloc(zcache_rbnode_cache,
512 GFP_ZCACHE);
513 if (!rbnode)
514 return -ENOMEM;
515
516 INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN);
517 spin_lock_init(&rbnode->ra_lock);
518 rbnode->rb_index = rb_index;
519 kref_init(&rbnode->refcount);
520 RB_CLEAR_NODE(&rbnode->rb_node);
521
522 /* add that rbnode to rbtree */
523 write_lock_irqsave(&zpool->rb_lock, flags);
524 tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
525 &parent, &link);
526 if (tmp) {
527 /* somebody else allocated new rbnode */
528 kmem_cache_free(zcache_rbnode_cache, rbnode);
529 rbnode = tmp;
530 } else {
531 rb_link_node(&rbnode->rb_node, parent, link);
532 rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
533 }
534
535 /* Inc the reference of this zcache_rbnode */
536 kref_get(&rbnode->refcount);
537 write_unlock_irqrestore(&zpool->rb_lock, flags);
538 }
539
540 /* Succfully got a zcache_rbnode when arriving here */
541 spin_lock_irqsave(&rbnode->ra_lock, flags);
542 dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
543 if (unlikely(dup_zaddr)) {
Steve Kondikf94910d2016-04-14 02:52:21 -0700544 if (dup_zaddr == ZERO_HANDLE) {
545 atomic_dec(&zcache_stored_zero_pages);
546 } else {
547 zbud_free(zpool->pool, (unsigned long)dup_zaddr);
548 atomic_dec(&zcache_stored_pages);
549 zpool->size = zbud_get_pool_size(zpool->pool);
550 }
551 zcache_dup_entry++;
552 }
553
554 /* Insert zcache_ra_handle to ratree */
555 ret = radix_tree_insert(&rbnode->ratree, ra_index,
556 (void *)zaddr);
557 spin_unlock_irqrestore(&rbnode->ra_lock, flags);
558 if (unlikely(ret)) {
559 write_lock_irqsave(&zpool->rb_lock, flags);
560 spin_lock(&rbnode->ra_lock);
561
562 if (zcache_rbnode_empty(rbnode))
563 zcache_rbnode_isolate(zpool, rbnode, 1);
564
565 spin_unlock(&rbnode->ra_lock);
566 write_unlock_irqrestore(&zpool->rb_lock, flags);
567 }
568
569 kref_put(&rbnode->refcount, zcache_rbnode_release);
570 return ret;
571}
572
573/*
574 * Load zaddr and delete it from radix tree.
575 * If the radix tree of the corresponding rbnode is empty, delete the rbnode
576 * from zpool->rbtree also.
577 */
578static void *zcache_load_delete_zaddr(struct zcache_pool *zpool,
579 int rb_index, int ra_index)
580{
581 struct zcache_rbnode *rbnode;
582 void *zaddr = NULL;
583 unsigned long flags;
584
585 rbnode = zcache_find_get_rbnode(zpool, rb_index);
586 if (!rbnode)
587 goto out;
588
589 BUG_ON(rbnode->rb_index != rb_index);
590
591 spin_lock_irqsave(&rbnode->ra_lock, flags);
592 zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
593 spin_unlock_irqrestore(&rbnode->ra_lock, flags);
594
595 /* rb_lock and ra_lock must be taken again in the given sequence */
596 write_lock_irqsave(&zpool->rb_lock, flags);
597 spin_lock(&rbnode->ra_lock);
598 if (zcache_rbnode_empty(rbnode))
599 zcache_rbnode_isolate(zpool, rbnode, 1);
600 spin_unlock(&rbnode->ra_lock);
601 write_unlock_irqrestore(&zpool->rb_lock, flags);
602
603 kref_put(&rbnode->refcount, zcache_rbnode_release);
604out:
605 return zaddr;
606}
607
608static bool zero_page(struct page *page)
609{
610 unsigned long *ptr = kmap_atomic(page);
611 int i;
612 bool ret = false;
613
614 for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
615 if (ptr[i])
616 goto out;
617 }
618 ret = true;
619out:
620 kunmap_atomic(ptr);
621 return ret;
622}
623
624static void zcache_store_page(int pool_id, struct cleancache_filekey key,
625 pgoff_t index, struct page *page)
626{
627 struct zcache_ra_handle *zhandle;
628 u8 *zpage, *src, *dst;
629 /* Address of zhandle + compressed data(zpage) */
630 unsigned long zaddr = 0;
631 unsigned int zlen = PAGE_SIZE;
632 bool zero = 0;
633 int ret;
634
635 struct zcache_pool *zpool = zcache.pools[pool_id];
636
637 /*
638 * Zcache will be ineffective if the compressed memory pool is full with
639 * compressed inactive file pages and most of them will never be used
640 * again.
641 * So we refuse to compress pages that are not from active file list.
642 */
643 if (!PageWasActive(page)) {
644 zcache_inactive_pages_refused++;
645 return;
646 }
647
648 zero = zero_page(page);
649 if (zero)
650 goto zero;
651
652 if (zcache_is_full()) {
653 zcache_pool_limit_hit++;
654 if (zbud_reclaim_page(zpool->pool, 8)) {
655 zcache_reclaim_fail++;
656 return;
657 }
658 /*
659 * Continue if reclaimed a page frame succ.
660 */
661 zcache_evict_filepages++;
662 zpool->size = zbud_get_pool_size(zpool->pool);
663 }
664
665 /* compress */
666 dst = get_cpu_var(zcache_dstmem);
667 src = kmap_atomic(page);
668 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
669 &zlen);
670 kunmap_atomic(src);
671 if (ret) {
672 pr_err("zcache compress error ret %d\n", ret);
673 put_cpu_var(zcache_dstmem);
674 return;
675 }
676
677 /* store zcache handle together with compressed page data */
678 ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
679 GFP_ZCACHE, &zaddr);
680 if (ret) {
681 zcache_zbud_alloc_fail++;
682 put_cpu_var(zcache_dstmem);
683 return;
684 }
685
686 zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);
687
688 /* Compressed page data stored at the end of zcache_ra_handle */
689 zpage = (u8 *)(zhandle + 1);
690 memcpy(zpage, dst, zlen);
691 zbud_unmap(zpool->pool, zaddr);
692 put_cpu_var(zcache_dstmem);
693
694zero:
695 if (zero)
696 zaddr = (unsigned long)ZERO_HANDLE;
697
698 /* store zcache handle */
699 ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
700 if (ret) {
701 zcache_store_failed++;
702 if (!zero)
703 zbud_free(zpool->pool, zaddr);
704 return;
705 }
706
707 /* update stats */
708 if (zero) {
709 atomic_inc(&zcache_stored_zero_pages);
710 } else {
711 zhandle->ra_index = index;
712 zhandle->rb_index = key.u.ino;
713 zhandle->zlen = zlen;
714 zhandle->zpool = zpool;
715 atomic_inc(&zcache_stored_pages);
716 zpool->size = zbud_get_pool_size(zpool->pool);
717 }
718
719 return;
720}
721
722static int zcache_load_page(int pool_id, struct cleancache_filekey key,
723 pgoff_t index, struct page *page)
724{
725 int ret = 0;
726 u8 *src, *dst;
727 void *zaddr;
728 unsigned int dlen = PAGE_SIZE;
729 struct zcache_ra_handle *zhandle;
730 struct zcache_pool *zpool = zcache.pools[pool_id];
731
732 zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
733 if (!zaddr)
734 return -ENOENT;
735 else if (zaddr == ZERO_HANDLE)
736 goto map;
737
738 zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
739 (unsigned long)zaddr);
740 /* Compressed page data stored at the end of zcache_ra_handle */
741 src = (u8 *)(zhandle + 1);
742
743 /* decompress */
744map:
745 dst = kmap_atomic(page);
746 if (zaddr != ZERO_HANDLE) {
747 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
748 zhandle->zlen, dst, &dlen);
749 } else {
750 memset(dst, 0, PAGE_SIZE);
751 kunmap_atomic(dst);
752 flush_dcache_page(page);
753 atomic_dec(&zcache_stored_zero_pages);
754 goto out;
755 }
756 kunmap_atomic(dst);
757 zbud_unmap(zpool->pool, (unsigned long)zaddr);
758 zbud_free(zpool->pool, (unsigned long)zaddr);
759
760 BUG_ON(ret);
761 BUG_ON(dlen != PAGE_SIZE);
762
763 /* update stats */
764 atomic_dec(&zcache_stored_pages);
765 zpool->size = zbud_get_pool_size(zpool->pool);
766out:
767 SetPageWasActive(page);
768 return ret;
769}
770
771static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
772 pgoff_t index)
773{
774 struct zcache_pool *zpool = zcache.pools[pool_id];
775 void *zaddr = NULL;
776
777 zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
778 if (zaddr && (zaddr != ZERO_HANDLE)) {
779 zbud_free(zpool->pool, (unsigned long)zaddr);
780 atomic_dec(&zcache_stored_pages);
781 zpool->size = zbud_get_pool_size(zpool->pool);
782 } else if (zaddr == ZERO_HANDLE) {
783 atomic_dec(&zcache_stored_zero_pages);
784 }
785}
786
787#define FREE_BATCH 16
788/*
789 * Callers must hold the lock
790 */
791static void zcache_flush_ratree(struct zcache_pool *zpool,
792 struct zcache_rbnode *rbnode)
793{
794 unsigned long index = 0;
795 int count, i;
796 struct zcache_ra_handle *zhandle;
797 void *zaddr = NULL;
798
799 do {
800 void *zaddrs[FREE_BATCH];
801 unsigned long indices[FREE_BATCH];
802
803 count = radix_tree_gang_lookup_index(&rbnode->ratree,
804 (void **)zaddrs, indices,
805 index, FREE_BATCH);
806
807 for (i = 0; i < count; i++) {
808 if (zaddrs[i] == ZERO_HANDLE) {
809 zaddr = radix_tree_delete(&rbnode->ratree,
810 indices[i]);
811 if (zaddr)
812 atomic_dec(&zcache_stored_zero_pages);
813 continue;
814 }
815 zhandle = (struct zcache_ra_handle *)zbud_map(
816 zpool->pool, (unsigned long)zaddrs[i]);
817 index = zhandle->ra_index;
818 zaddr = radix_tree_delete(&rbnode->ratree, index);
819 if (!zaddr)
820 continue;
821 zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
822 zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
823 atomic_dec(&zcache_stored_pages);
824 zpool->size = zbud_get_pool_size(zpool->pool);
825 }
826
827 index++;
828 } while (count == FREE_BATCH);
829}
830
831static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
832{
833 struct zcache_rbnode *rbnode;
834 unsigned long flags1, flags2;
835 struct zcache_pool *zpool = zcache.pools[pool_id];
836
837 /*
838 * Refuse new pages added in to the same rbinode, so get rb_lock at
839 * first.
840 */
841 write_lock_irqsave(&zpool->rb_lock, flags1);
842 rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
843 if (!rbnode) {
844 write_unlock_irqrestore(&zpool->rb_lock, flags1);
845 return;
846 }
847
848 kref_get(&rbnode->refcount);
849 spin_lock_irqsave(&rbnode->ra_lock, flags2);
850
851 zcache_flush_ratree(zpool, rbnode);
852 if (zcache_rbnode_empty(rbnode))
853 /* When arrvied here, we already hold rb_lock */
854 zcache_rbnode_isolate(zpool, rbnode, 1);
855
856 spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
857 write_unlock_irqrestore(&zpool->rb_lock, flags1);
858 kref_put(&rbnode->refcount, zcache_rbnode_release);
859}
860
861static void zcache_destroy_pool(struct zcache_pool *zpool);
862static void zcache_flush_fs(int pool_id)
863{
864 struct zcache_rbnode *z_rbnode = NULL;
865 struct rb_node *rbnode;
866 unsigned long flags1, flags2;
867 struct zcache_pool *zpool;
868
869 if (pool_id < 0)
870 return;
871
872 zpool = zcache.pools[pool_id];
873 if (!zpool)
874 return;
875
876 /*
877 * Refuse new pages added in, so get rb_lock at first.
878 */
879 write_lock_irqsave(&zpool->rb_lock, flags1);
880
881 rbnode = rb_first(&zpool->rbtree);
882 while (rbnode) {
883 z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
884 rbnode = rb_next(rbnode);
885 if (z_rbnode) {
886 kref_get(&z_rbnode->refcount);
887 spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
888 zcache_flush_ratree(zpool, z_rbnode);
889 if (zcache_rbnode_empty(z_rbnode))
890 zcache_rbnode_isolate(zpool, z_rbnode, 1);
891 spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
892 kref_put(&z_rbnode->refcount, zcache_rbnode_release);
893 }
894 }
895
896 write_unlock_irqrestore(&zpool->rb_lock, flags1);
897 zcache_destroy_pool(zpool);
898}
899
900/*
901 * Evict compressed pages from zcache pool on an LRU basis after the compressed
902 * pool is full.
903 */
904static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
905{
906 struct zcache_pool *zpool;
907 struct zcache_ra_handle *zhandle;
908 void *zaddr_intree;
909
910 BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);
911
912 zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);
913
914 zpool = zhandle->zpool;
915 /* There can be a race with zcache store */
916 if (!zpool)
917 return -EINVAL;
918
919 BUG_ON(pool != zpool->pool);
920
921 zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
922 zhandle->ra_index);
923 if (zaddr_intree) {
924 BUG_ON((unsigned long)zaddr_intree != zaddr);
925 zbud_unmap(pool, zaddr);
926 zbud_free(pool, zaddr);
927 atomic_dec(&zcache_stored_pages);
928 zpool->size = zbud_get_pool_size(pool);
929 zcache_evict_zpages++;
930 }
931 return 0;
932}
933
934static struct zbud_ops zcache_zbud_ops = {
935 .evict = zcache_evict_zpage
936};
937
938/* Return pool id */
939static int zcache_create_pool(void)
940{
941 int ret;
942 struct zcache_pool *zpool;
943
944 zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
945 if (!zpool) {
946 ret = -ENOMEM;
947 goto out;
948 }
949
950 zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
951 if (!zpool->pool) {
952 kfree(zpool);
953 ret = -ENOMEM;
954 goto out;
955 }
956
957 spin_lock(&zcache.pool_lock);
958 if (zcache.num_pools == MAX_ZCACHE_POOLS) {
959 pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
960 zbud_destroy_pool(zpool->pool);
961 kfree(zpool);
962 ret = -EPERM;
963 goto out_unlock;
964 }
965
966 rwlock_init(&zpool->rb_lock);
967 zpool->rbtree = RB_ROOT;
968 /* Add to pool list */
969 for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
970 if (!zcache.pools[ret])
971 break;
972 zcache.pools[ret] = zpool;
973 zcache.num_pools++;
974 pr_info("New pool created id:%d\n", ret);
975
976out_unlock:
977 spin_unlock(&zcache.pool_lock);
978out:
979 return ret;
980}
981
982static void zcache_destroy_pool(struct zcache_pool *zpool)
983{
984 int i;
985
986 if (!zpool)
987 return;
988
989 spin_lock(&zcache.pool_lock);
990 zcache.num_pools--;
991 for (i = 0; i < MAX_ZCACHE_POOLS; i++)
992 if (zcache.pools[i] == zpool)
993 break;
994 zcache.pools[i] = NULL;
995 spin_unlock(&zcache.pool_lock);
996
997 if (!RB_EMPTY_ROOT(&zpool->rbtree))
998 WARN_ON("Memory leak detected. Freeing non-empty pool!\n");
999
1000 zbud_destroy_pool(zpool->pool);
1001 kfree(zpool);
1002}
1003
1004static int zcache_init_fs(size_t pagesize)
1005{
1006 int ret;
1007
1008 if (pagesize != PAGE_SIZE) {
1009 pr_info("Unsupported page size: %zu", pagesize);
1010 ret = -EINVAL;
1011 goto out;
1012 }
1013
1014 ret = zcache_create_pool();
1015 if (ret < 0) {
1016 pr_info("Failed to create new pool\n");
1017 ret = -ENOMEM;
1018 goto out;
1019 }
1020out:
1021 return ret;
1022}
1023
1024static int zcache_init_shared_fs(char *uuid, size_t pagesize)
1025{
1026 /* shared pools are unsupported and map to private */
1027 return zcache_init_fs(pagesize);
1028}
1029
1030static struct cleancache_ops zcache_ops = {
1031 .put_page = zcache_store_page,
1032 .get_page = zcache_load_page,
1033 .invalidate_page = zcache_flush_page,
1034 .invalidate_inode = zcache_flush_inode,
1035 .invalidate_fs = zcache_flush_fs,
1036 .init_shared_fs = zcache_init_shared_fs,
1037 .init_fs = zcache_init_fs
1038};
1039
1040/*
1041 * Debugfs functions
1042 */
1043#ifdef CONFIG_DEBUG_FS
1044#include <linux/debugfs.h>
1045
1046static int pool_pages_get(void *_data, u64 *val)
1047{
1048 *val = zcache_pages();
1049 return 0;
1050}
1051
1052DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");
1053
1054static struct dentry *zcache_debugfs_root;
1055
1056static int __init zcache_debugfs_init(void)
1057{
1058 if (!debugfs_initialized())
1059 return -ENODEV;
1060
1061 zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
1062 if (!zcache_debugfs_root)
1063 return -ENOMEM;
1064
1065 debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
1066 &zcache_pool_limit_hit);
1067 debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
1068 &zcache_zbud_alloc_fail);
1069 debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
1070 &zcache_dup_entry);
1071 debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
1072 &pool_page_fops);
1073 debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
1074 &zcache_stored_pages);
1075 debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
1076 zcache_debugfs_root, &zcache_stored_zero_pages);
1077 debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
1078 &zcache_evict_zpages);
1079 debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
1080 &zcache_evict_filepages);
1081 debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
1082 &zcache_reclaim_fail);
1083 debugfs_create_u64("inactive_pages_refused", S_IRUGO,
1084 zcache_debugfs_root, &zcache_inactive_pages_refused);
1085 debugfs_create_u64("pool_shrink_count", S_IRUGO,
1086 zcache_debugfs_root, &zcache_pool_shrink);
1087 debugfs_create_u64("pool_shrink_fail", S_IRUGO,
1088 zcache_debugfs_root, &zcache_pool_shrink_fail);
1089 debugfs_create_u64("pool_shrink_pages", S_IRUGO,
1090 zcache_debugfs_root, &zcache_pool_shrink_pages);
1091 debugfs_create_u64("store_fail", S_IRUGO,
1092 zcache_debugfs_root, &zcache_store_failed);
1093 return 0;
1094}
1095
1096static void __exit zcache_debugfs_exit(void)
1097{
1098 debugfs_remove_recursive(zcache_debugfs_root);
1099}
1100#else
1101static int __init zcache_debugfs_init(void)
1102{
1103 return 0;
1104}
1105static void __exit zcache_debugfs_exit(void)
1106{
1107}
1108#endif
1109
1110/*
1111 * zcache init and exit
1112 */
1113static int __init init_zcache(void)
1114{
1115 if (!zcache_enabled)
1116 return 0;
1117
1118 pr_info("loading zcache..\n");
1119 if (zcache_rbnode_cache_create()) {
1120 pr_err("entry cache creation failed\n");
1121 goto error;
1122 }
1123
1124 if (zcache_comp_init()) {
1125 pr_err("compressor initialization failed\n");
1126 goto compfail;
1127 }
1128 if (zcache_cpu_init()) {
1129 pr_err("per-cpu initialization failed\n");
1130 goto pcpufail;
1131 }
1132
1133 spin_lock_init(&zcache.pool_lock);
1134 cleancache_register_ops(&zcache_ops);
1135
1136 if (zcache_debugfs_init())
1137 pr_warn("debugfs initialization failed\n");
1138 register_shrinker(&zcache_shrinker);
1139 return 0;
1140pcpufail:
1141 zcache_comp_exit();
1142compfail:
1143 zcache_rbnode_cache_destroy();
1144error:
1145 return -ENOMEM;
1146}
1147
1148/* must be late so crypto has time to come up */
1149late_initcall(init_zcache);
1150
1151MODULE_LICENSE("GPL");
1152MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
1153MODULE_DESCRIPTION("Compressed cache for clean file pages");
1154