blob: defe350d053e141848920baaacc09c2a4f1fb79a [file] [log] [blame]
Nitin Gupta61989a82012-01-09 16:51:56 -06001/*
2 * zsmalloc memory allocator
3 *
4 * Copyright (C) 2011 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the license that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
Nitin Gupta2db51da2012-06-09 17:41:14 -070013
14/*
15 * This allocator is designed for use with zcache and zram. Thus, the
16 * allocator is supposed to work well under low memory conditions. In
17 * particular, it never attempts higher order page allocation which is
18 * very likely to fail under memory pressure. On the other hand, if we
19 * just use single (0-order) pages, it would suffer from very high
20 * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy
21 * an entire page. This was one of the major issues with its predecessor
22 * (xvmalloc).
23 *
24 * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
25 * and links them together using various 'struct page' fields. These linked
26 * pages act as a single higher-order page i.e. an object can span 0-order
27 * page boundaries. The code refers to these linked pages as a single entity
28 * called zspage.
29 *
30 * Following is how we use various fields and flags of underlying
31 * struct page(s) to form a zspage.
32 *
33 * Usage of struct page fields:
34 * page->first_page: points to the first component (0-order) page
35 * page->index (union with page->freelist): offset of the first object
36 * starting in this page. For the first page, this is
37 * always 0, so we use this field (aka freelist) to point
38 * to the first free object in zspage.
39 * page->lru: links together all component pages (except the first page)
40 * of a zspage
41 *
42 * For _first_ page only:
43 *
44 * page->private (union with page->first_page): refers to the
45 * component page after the first page
46 * page->freelist: points to the first free object in zspage.
47 * Free objects are linked together using in-place
48 * metadata.
49 * page->objects: maximum number of objects we can store in this
50 * zspage (class->zspage_order * PAGE_SIZE / class->size)
51 * page->lru: links together first pages of various zspages.
52 * Basically forming list of zspages in a fullness group.
53 * page->mapping: class index and fullness group of the zspage
54 *
55 * Usage of struct page flags:
56 * PG_private: identifies the first component page
57 * PG_private2: identifies the last component page
58 *
59 */
60
Nitin Gupta61989a82012-01-09 16:51:56 -060061#ifdef CONFIG_ZSMALLOC_DEBUG
62#define DEBUG
63#endif
64
65#include <linux/module.h>
66#include <linux/kernel.h>
67#include <linux/bitops.h>
68#include <linux/errno.h>
69#include <linux/highmem.h>
70#include <linux/init.h>
71#include <linux/string.h>
72#include <linux/slab.h>
73#include <asm/tlbflush.h>
74#include <asm/pgtable.h>
75#include <linux/cpumask.h>
76#include <linux/cpu.h>
Seth Jennings0cbb6132012-02-13 08:47:49 -060077#include <linux/vmalloc.h>
Seth Jenningsc60369f2012-07-18 11:55:55 -050078#include <linux/hardirq.h>
Nitin Gupta61989a82012-01-09 16:51:56 -060079
80#include "zsmalloc.h"
81#include "zsmalloc_int.h"
82
83/*
84 * A zspage's class index and fullness group
85 * are encoded in its (first)page->mapping
86 */
87#define CLASS_IDX_BITS 28
88#define FULLNESS_BITS 4
89#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
90#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
91
Seth Jenningsf5536462012-07-18 11:55:56 -050092/*
93 * By default, zsmalloc uses a copy-based object mapping method to access
94 * allocations that span two pages. However, if a particular architecture
95 * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping
96 * faster than copying, then it should be added here so that
97 * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table
98 * mapping rather than copying
99 * for object mapping.
100*/
101#if defined(CONFIG_ARM)
102#define USE_PGTABLE_MAPPING
103#endif
104
105struct mapping_area {
106#ifdef USE_PGTABLE_MAPPING
107 struct vm_struct *vm; /* vm area for mapping object that span pages */
108#else
109 char *vm_buf; /* copy buffer for objects that span pages */
110#endif
111 char *vm_addr; /* address of kmap_atomic()'ed pages */
112 enum zs_mapmode vm_mm; /* mapping mode */
113};
114
115
Nitin Gupta61989a82012-01-09 16:51:56 -0600116/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
117static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
118
119static int is_first_page(struct page *page)
120{
Minchan Kima27545bf2012-04-25 15:23:09 +0900121 return PagePrivate(page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600122}
123
124static int is_last_page(struct page *page)
125{
Minchan Kima27545bf2012-04-25 15:23:09 +0900126 return PagePrivate2(page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600127}
128
129static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
130 enum fullness_group *fullness)
131{
132 unsigned long m;
133 BUG_ON(!is_first_page(page));
134
135 m = (unsigned long)page->mapping;
136 *fullness = m & FULLNESS_MASK;
137 *class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;
138}
139
140static void set_zspage_mapping(struct page *page, unsigned int class_idx,
141 enum fullness_group fullness)
142{
143 unsigned long m;
144 BUG_ON(!is_first_page(page));
145
146 m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) |
147 (fullness & FULLNESS_MASK);
148 page->mapping = (struct address_space *)m;
149}
150
151static int get_size_class_index(int size)
152{
153 int idx = 0;
154
155 if (likely(size > ZS_MIN_ALLOC_SIZE))
156 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
157 ZS_SIZE_CLASS_DELTA);
158
159 return idx;
160}
161
162static enum fullness_group get_fullness_group(struct page *page)
163{
164 int inuse, max_objects;
165 enum fullness_group fg;
166 BUG_ON(!is_first_page(page));
167
168 inuse = page->inuse;
169 max_objects = page->objects;
170
171 if (inuse == 0)
172 fg = ZS_EMPTY;
173 else if (inuse == max_objects)
174 fg = ZS_FULL;
175 else if (inuse <= max_objects / fullness_threshold_frac)
176 fg = ZS_ALMOST_EMPTY;
177 else
178 fg = ZS_ALMOST_FULL;
179
180 return fg;
181}
182
183static void insert_zspage(struct page *page, struct size_class *class,
184 enum fullness_group fullness)
185{
186 struct page **head;
187
188 BUG_ON(!is_first_page(page));
189
190 if (fullness >= _ZS_NR_FULLNESS_GROUPS)
191 return;
192
193 head = &class->fullness_list[fullness];
194 if (*head)
195 list_add_tail(&page->lru, &(*head)->lru);
196
197 *head = page;
198}
199
200static void remove_zspage(struct page *page, struct size_class *class,
201 enum fullness_group fullness)
202{
203 struct page **head;
204
205 BUG_ON(!is_first_page(page));
206
207 if (fullness >= _ZS_NR_FULLNESS_GROUPS)
208 return;
209
210 head = &class->fullness_list[fullness];
211 BUG_ON(!*head);
212 if (list_empty(&(*head)->lru))
213 *head = NULL;
214 else if (*head == page)
215 *head = (struct page *)list_entry((*head)->lru.next,
216 struct page, lru);
217
218 list_del_init(&page->lru);
219}
220
221static enum fullness_group fix_fullness_group(struct zs_pool *pool,
222 struct page *page)
223{
224 int class_idx;
225 struct size_class *class;
226 enum fullness_group currfg, newfg;
227
228 BUG_ON(!is_first_page(page));
229
230 get_zspage_mapping(page, &class_idx, &currfg);
231 newfg = get_fullness_group(page);
232 if (newfg == currfg)
233 goto out;
234
235 class = &pool->size_class[class_idx];
236 remove_zspage(page, class, currfg);
237 insert_zspage(page, class, newfg);
238 set_zspage_mapping(page, class_idx, newfg);
239
240out:
241 return newfg;
242}
243
244/*
245 * We have to decide on how many pages to link together
246 * to form a zspage for each size class. This is important
247 * to reduce wastage due to unusable space left at end of
248 * each zspage which is given as:
249 * wastage = Zp - Zp % size_class
250 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
251 *
252 * For example, for size class of 3/8 * PAGE_SIZE, we should
253 * link together 3 PAGE_SIZE sized pages to form a zspage
254 * since then we can perfectly fit in 8 such objects.
255 */
Minchan Kim2e3b6152012-05-03 15:40:39 +0900256static int get_pages_per_zspage(int class_size)
Nitin Gupta61989a82012-01-09 16:51:56 -0600257{
258 int i, max_usedpc = 0;
259 /* zspage order which gives maximum used size per KB */
260 int max_usedpc_order = 1;
261
Seth Jennings84d4faa2012-03-05 11:33:21 -0600262 for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
Nitin Gupta61989a82012-01-09 16:51:56 -0600263 int zspage_size;
264 int waste, usedpc;
265
266 zspage_size = i * PAGE_SIZE;
267 waste = zspage_size % class_size;
268 usedpc = (zspage_size - waste) * 100 / zspage_size;
269
270 if (usedpc > max_usedpc) {
271 max_usedpc = usedpc;
272 max_usedpc_order = i;
273 }
274 }
275
276 return max_usedpc_order;
277}
278
279/*
280 * A single 'zspage' is composed of many system pages which are
281 * linked together using fields in struct page. This function finds
282 * the first/head page, given any component page of a zspage.
283 */
284static struct page *get_first_page(struct page *page)
285{
286 if (is_first_page(page))
287 return page;
288 else
289 return page->first_page;
290}
291
292static struct page *get_next_page(struct page *page)
293{
294 struct page *next;
295
296 if (is_last_page(page))
297 next = NULL;
298 else if (is_first_page(page))
299 next = (struct page *)page->private;
300 else
301 next = list_entry(page->lru.next, struct page, lru);
302
303 return next;
304}
305
306/* Encode <page, obj_idx> as a single handle value */
307static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
308{
309 unsigned long handle;
310
311 if (!page) {
312 BUG_ON(obj_idx);
313 return NULL;
314 }
315
316 handle = page_to_pfn(page) << OBJ_INDEX_BITS;
317 handle |= (obj_idx & OBJ_INDEX_MASK);
318
319 return (void *)handle;
320}
321
322/* Decode <page, obj_idx> pair from the given object handle */
Minchan Kimc2344342012-06-08 15:39:25 +0900323static void obj_handle_to_location(unsigned long handle, struct page **page,
Nitin Gupta61989a82012-01-09 16:51:56 -0600324 unsigned long *obj_idx)
325{
Minchan Kimc2344342012-06-08 15:39:25 +0900326 *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
327 *obj_idx = handle & OBJ_INDEX_MASK;
Nitin Gupta61989a82012-01-09 16:51:56 -0600328}
329
330static unsigned long obj_idx_to_offset(struct page *page,
331 unsigned long obj_idx, int class_size)
332{
333 unsigned long off = 0;
334
335 if (!is_first_page(page))
336 off = page->index;
337
338 return off + obj_idx * class_size;
339}
340
Nitin Guptaf4477e92012-04-02 09:13:56 -0500341static void reset_page(struct page *page)
342{
343 clear_bit(PG_private, &page->flags);
344 clear_bit(PG_private_2, &page->flags);
345 set_page_private(page, 0);
346 page->mapping = NULL;
347 page->freelist = NULL;
348 reset_page_mapcount(page);
349}
350
Nitin Gupta61989a82012-01-09 16:51:56 -0600351static void free_zspage(struct page *first_page)
352{
Nitin Guptaf4477e92012-04-02 09:13:56 -0500353 struct page *nextp, *tmp, *head_extra;
Nitin Gupta61989a82012-01-09 16:51:56 -0600354
355 BUG_ON(!is_first_page(first_page));
356 BUG_ON(first_page->inuse);
357
Nitin Guptaf4477e92012-04-02 09:13:56 -0500358 head_extra = (struct page *)page_private(first_page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600359
Nitin Guptaf4477e92012-04-02 09:13:56 -0500360 reset_page(first_page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600361 __free_page(first_page);
362
363 /* zspage with only 1 system page */
Nitin Guptaf4477e92012-04-02 09:13:56 -0500364 if (!head_extra)
Nitin Gupta61989a82012-01-09 16:51:56 -0600365 return;
366
Nitin Guptaf4477e92012-04-02 09:13:56 -0500367 list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
Nitin Gupta61989a82012-01-09 16:51:56 -0600368 list_del(&nextp->lru);
Nitin Guptaf4477e92012-04-02 09:13:56 -0500369 reset_page(nextp);
Nitin Gupta61989a82012-01-09 16:51:56 -0600370 __free_page(nextp);
371 }
Nitin Guptaf4477e92012-04-02 09:13:56 -0500372 reset_page(head_extra);
373 __free_page(head_extra);
Nitin Gupta61989a82012-01-09 16:51:56 -0600374}
375
376/* Initialize a newly allocated zspage */
377static void init_zspage(struct page *first_page, struct size_class *class)
378{
379 unsigned long off = 0;
380 struct page *page = first_page;
381
382 BUG_ON(!is_first_page(first_page));
383 while (page) {
384 struct page *next_page;
385 struct link_free *link;
386 unsigned int i, objs_on_page;
387
388 /*
389 * page->index stores offset of first object starting
390 * in the page. For the first page, this is always 0,
391 * so we use first_page->index (aka ->freelist) to store
392 * head of corresponding zspage's freelist.
393 */
394 if (page != first_page)
395 page->index = off;
396
397 link = (struct link_free *)kmap_atomic(page) +
398 off / sizeof(*link);
399 objs_on_page = (PAGE_SIZE - off) / class->size;
400
401 for (i = 1; i <= objs_on_page; i++) {
402 off += class->size;
403 if (off < PAGE_SIZE) {
404 link->next = obj_location_to_handle(page, i);
405 link += class->size / sizeof(*link);
406 }
407 }
408
409 /*
410 * We now come to the last (full or partial) object on this
411 * page, which must point to the first object on the next
412 * page (if present)
413 */
414 next_page = get_next_page(page);
415 link->next = obj_location_to_handle(next_page, 0);
416 kunmap_atomic(link);
417 page = next_page;
418 off = (off + class->size) % PAGE_SIZE;
419 }
420}
421
422/*
423 * Allocate a zspage for the given size class
424 */
425static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
426{
427 int i, error;
Seth Jenningsb4b700c2012-06-13 16:03:42 -0500428 struct page *first_page = NULL, *uninitialized_var(prev_page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600429
430 /*
431 * Allocate individual pages and link them together as:
432 * 1. first page->private = first sub-page
433 * 2. all sub-pages are linked together using page->lru
434 * 3. each sub-page is linked to the first page using page->first_page
435 *
436 * For each size class, First/Head pages are linked together using
437 * page->lru. Also, we set PG_private to identify the first page
438 * (i.e. no other sub-page has this flag set) and PG_private_2 to
439 * identify the last page.
440 */
441 error = -ENOMEM;
Minchan Kim2e3b6152012-05-03 15:40:39 +0900442 for (i = 0; i < class->pages_per_zspage; i++) {
Seth Jenningsb4b700c2012-06-13 16:03:42 -0500443 struct page *page;
Nitin Gupta61989a82012-01-09 16:51:56 -0600444
445 page = alloc_page(flags);
446 if (!page)
447 goto cleanup;
448
449 INIT_LIST_HEAD(&page->lru);
450 if (i == 0) { /* first page */
Minchan Kima27545bf2012-04-25 15:23:09 +0900451 SetPagePrivate(page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600452 set_page_private(page, 0);
453 first_page = page;
454 first_page->inuse = 0;
455 }
456 if (i == 1)
457 first_page->private = (unsigned long)page;
458 if (i >= 1)
459 page->first_page = first_page;
460 if (i >= 2)
461 list_add(&page->lru, &prev_page->lru);
Minchan Kim2e3b6152012-05-03 15:40:39 +0900462 if (i == class->pages_per_zspage - 1) /* last page */
Minchan Kima27545bf2012-04-25 15:23:09 +0900463 SetPagePrivate2(page);
Nitin Gupta61989a82012-01-09 16:51:56 -0600464 prev_page = page;
465 }
466
467 init_zspage(first_page, class);
468
469 first_page->freelist = obj_location_to_handle(first_page, 0);
470 /* Maximum number of objects we can store in this zspage */
Minchan Kim2e3b6152012-05-03 15:40:39 +0900471 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
Nitin Gupta61989a82012-01-09 16:51:56 -0600472
473 error = 0; /* Success */
474
475cleanup:
476 if (unlikely(error) && first_page) {
477 free_zspage(first_page);
478 first_page = NULL;
479 }
480
481 return first_page;
482}
483
484static struct page *find_get_zspage(struct size_class *class)
485{
486 int i;
487 struct page *page;
488
489 for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
490 page = class->fullness_list[i];
491 if (page)
492 break;
493 }
494
495 return page;
496}
497
Seth Jenningsf5536462012-07-18 11:55:56 -0500498#ifdef USE_PGTABLE_MAPPING
499static inline int __zs_cpu_up(struct mapping_area *area)
Seth Jennings5f601902012-07-02 16:15:49 -0500500{
Seth Jenningsf5536462012-07-18 11:55:56 -0500501 /*
502 * Make sure we don't leak memory if a cpu UP notification
503 * and zs_init() race and both call zs_cpu_up() on the same cpu
504 */
505 if (area->vm)
506 return 0;
507 area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
508 if (!area->vm)
509 return -ENOMEM;
510 return 0;
511}
512
513static inline void __zs_cpu_down(struct mapping_area *area)
514{
515 if (area->vm)
516 free_vm_area(area->vm);
517 area->vm = NULL;
518}
519
520static inline void *__zs_map_object(struct mapping_area *area,
521 struct page *pages[2], int off, int size)
522{
523 BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
524 area->vm_addr = area->vm->addr;
525 return area->vm_addr + off;
526}
527
528static inline void __zs_unmap_object(struct mapping_area *area,
529 struct page *pages[2], int off, int size)
530{
531 unsigned long addr = (unsigned long)area->vm_addr;
532 unsigned long end = addr + (PAGE_SIZE * 2);
533
534 flush_cache_vunmap(addr, end);
535 unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
536 local_flush_tlb_kernel_range(addr, end);
537}
538
539#else /* USE_PGTABLE_MAPPING */
540
541static inline int __zs_cpu_up(struct mapping_area *area)
542{
543 /*
544 * Make sure we don't leak memory if a cpu UP notification
545 * and zs_init() race and both call zs_cpu_up() on the same cpu
546 */
547 if (area->vm_buf)
548 return 0;
549 area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
550 if (!area->vm_buf)
551 return -ENOMEM;
552 return 0;
553}
554
555static inline void __zs_cpu_down(struct mapping_area *area)
556{
557 if (area->vm_buf)
558 free_page((unsigned long)area->vm_buf);
559 area->vm_buf = NULL;
560}
561
562static void *__zs_map_object(struct mapping_area *area,
563 struct page *pages[2], int off, int size)
564{
Seth Jennings5f601902012-07-02 16:15:49 -0500565 int sizes[2];
566 void *addr;
Seth Jenningsf5536462012-07-18 11:55:56 -0500567 char *buf = area->vm_buf;
Seth Jennings5f601902012-07-02 16:15:49 -0500568
Seth Jenningsf5536462012-07-18 11:55:56 -0500569 /* disable page faults to match kmap_atomic() return conditions */
570 pagefault_disable();
571
572 /* no read fastpath */
573 if (area->vm_mm == ZS_MM_WO)
574 goto out;
Seth Jennings5f601902012-07-02 16:15:49 -0500575
576 sizes[0] = PAGE_SIZE - off;
577 sizes[1] = size - sizes[0];
578
Seth Jennings5f601902012-07-02 16:15:49 -0500579 /* copy object to per-cpu buffer */
580 addr = kmap_atomic(pages[0]);
581 memcpy(buf, addr + off, sizes[0]);
582 kunmap_atomic(addr);
583 addr = kmap_atomic(pages[1]);
584 memcpy(buf + sizes[0], addr, sizes[1]);
585 kunmap_atomic(addr);
Seth Jenningsf5536462012-07-18 11:55:56 -0500586out:
587 return area->vm_buf;
Seth Jennings5f601902012-07-02 16:15:49 -0500588}
589
Seth Jenningsf5536462012-07-18 11:55:56 -0500590static void __zs_unmap_object(struct mapping_area *area,
591 struct page *pages[2], int off, int size)
Seth Jennings5f601902012-07-02 16:15:49 -0500592{
Seth Jennings5f601902012-07-02 16:15:49 -0500593 int sizes[2];
594 void *addr;
Seth Jenningsf5536462012-07-18 11:55:56 -0500595 char *buf = area->vm_buf;
Seth Jennings5f601902012-07-02 16:15:49 -0500596
Seth Jenningsf5536462012-07-18 11:55:56 -0500597 /* no write fastpath */
598 if (area->vm_mm == ZS_MM_RO)
599 goto out;
Seth Jennings5f601902012-07-02 16:15:49 -0500600
601 sizes[0] = PAGE_SIZE - off;
602 sizes[1] = size - sizes[0];
603
604 /* copy per-cpu buffer to object */
605 addr = kmap_atomic(pages[0]);
606 memcpy(addr + off, buf, sizes[0]);
607 kunmap_atomic(addr);
608 addr = kmap_atomic(pages[1]);
609 memcpy(addr, buf + sizes[0], sizes[1]);
610 kunmap_atomic(addr);
Seth Jenningsf5536462012-07-18 11:55:56 -0500611
612out:
613 /* enable page faults to match kunmap_atomic() return conditions */
614 pagefault_enable();
Seth Jennings5f601902012-07-02 16:15:49 -0500615}
Nitin Gupta61989a82012-01-09 16:51:56 -0600616
Seth Jenningsf5536462012-07-18 11:55:56 -0500617#endif /* USE_PGTABLE_MAPPING */
618
Nitin Gupta61989a82012-01-09 16:51:56 -0600619static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
620 void *pcpu)
621{
Seth Jenningsf5536462012-07-18 11:55:56 -0500622 int ret, cpu = (long)pcpu;
Nitin Gupta61989a82012-01-09 16:51:56 -0600623 struct mapping_area *area;
624
625 switch (action) {
626 case CPU_UP_PREPARE:
627 area = &per_cpu(zs_map_area, cpu);
Seth Jenningsf5536462012-07-18 11:55:56 -0500628 ret = __zs_cpu_up(area);
629 if (ret)
630 return notifier_from_errno(ret);
Nitin Gupta61989a82012-01-09 16:51:56 -0600631 break;
632 case CPU_DEAD:
633 case CPU_UP_CANCELED:
634 area = &per_cpu(zs_map_area, cpu);
Seth Jenningsf5536462012-07-18 11:55:56 -0500635 __zs_cpu_down(area);
Nitin Gupta61989a82012-01-09 16:51:56 -0600636 break;
637 }
638
639 return NOTIFY_OK;
640}
641
642static struct notifier_block zs_cpu_nb = {
643 .notifier_call = zs_cpu_notifier
644};
645
646static void zs_exit(void)
647{
648 int cpu;
649
650 for_each_online_cpu(cpu)
651 zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
652 unregister_cpu_notifier(&zs_cpu_nb);
653}
654
655static int zs_init(void)
656{
657 int cpu, ret;
658
659 register_cpu_notifier(&zs_cpu_nb);
660 for_each_online_cpu(cpu) {
661 ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
662 if (notifier_to_errno(ret))
663 goto fail;
664 }
665 return 0;
666fail:
667 zs_exit();
668 return notifier_to_errno(ret);
669}
670
671struct zs_pool *zs_create_pool(const char *name, gfp_t flags)
672{
Ben Hutchings069f1012012-06-20 02:31:11 +0100673 int i, ovhd_size;
Nitin Gupta61989a82012-01-09 16:51:56 -0600674 struct zs_pool *pool;
675
676 if (!name)
677 return NULL;
678
679 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
680 pool = kzalloc(ovhd_size, GFP_KERNEL);
681 if (!pool)
682 return NULL;
683
684 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
685 int size;
686 struct size_class *class;
687
688 size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
689 if (size > ZS_MAX_ALLOC_SIZE)
690 size = ZS_MAX_ALLOC_SIZE;
691
692 class = &pool->size_class[i];
693 class->size = size;
694 class->index = i;
695 spin_lock_init(&class->lock);
Minchan Kim2e3b6152012-05-03 15:40:39 +0900696 class->pages_per_zspage = get_pages_per_zspage(size);
Nitin Gupta61989a82012-01-09 16:51:56 -0600697
698 }
699
Nitin Gupta61989a82012-01-09 16:51:56 -0600700 pool->flags = flags;
701 pool->name = name;
702
Nitin Gupta61989a82012-01-09 16:51:56 -0600703 return pool;
704}
705EXPORT_SYMBOL_GPL(zs_create_pool);
706
707void zs_destroy_pool(struct zs_pool *pool)
708{
709 int i;
710
711 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
712 int fg;
713 struct size_class *class = &pool->size_class[i];
714
715 for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
716 if (class->fullness_list[fg]) {
717 pr_info("Freeing non-empty class with size "
718 "%db, fullness group %d\n",
719 class->size, fg);
720 }
721 }
722 }
723 kfree(pool);
724}
725EXPORT_SYMBOL_GPL(zs_destroy_pool);
726
727/**
728 * zs_malloc - Allocate block of given size from pool.
729 * @pool: pool to allocate from
730 * @size: size of block to allocate
Nitin Gupta61989a82012-01-09 16:51:56 -0600731 *
Minchan Kim00a61d82012-05-03 15:40:40 +0900732 * On success, handle to the allocated object is returned,
Minchan Kimc2344342012-06-08 15:39:25 +0900733 * otherwise 0.
Nitin Gupta61989a82012-01-09 16:51:56 -0600734 * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
735 */
Minchan Kimc2344342012-06-08 15:39:25 +0900736unsigned long zs_malloc(struct zs_pool *pool, size_t size)
Nitin Gupta61989a82012-01-09 16:51:56 -0600737{
Minchan Kimc2344342012-06-08 15:39:25 +0900738 unsigned long obj;
Nitin Gupta61989a82012-01-09 16:51:56 -0600739 struct link_free *link;
740 int class_idx;
741 struct size_class *class;
742
743 struct page *first_page, *m_page;
744 unsigned long m_objidx, m_offset;
745
746 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
Minchan Kimc2344342012-06-08 15:39:25 +0900747 return 0;
Nitin Gupta61989a82012-01-09 16:51:56 -0600748
749 class_idx = get_size_class_index(size);
750 class = &pool->size_class[class_idx];
751 BUG_ON(class_idx != class->index);
752
753 spin_lock(&class->lock);
754 first_page = find_get_zspage(class);
755
756 if (!first_page) {
757 spin_unlock(&class->lock);
758 first_page = alloc_zspage(class, pool->flags);
759 if (unlikely(!first_page))
Minchan Kimc2344342012-06-08 15:39:25 +0900760 return 0;
Nitin Gupta61989a82012-01-09 16:51:56 -0600761
762 set_zspage_mapping(first_page, class->index, ZS_EMPTY);
763 spin_lock(&class->lock);
Minchan Kim2e3b6152012-05-03 15:40:39 +0900764 class->pages_allocated += class->pages_per_zspage;
Nitin Gupta61989a82012-01-09 16:51:56 -0600765 }
766
Minchan Kimc2344342012-06-08 15:39:25 +0900767 obj = (unsigned long)first_page->freelist;
Nitin Gupta61989a82012-01-09 16:51:56 -0600768 obj_handle_to_location(obj, &m_page, &m_objidx);
769 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
770
771 link = (struct link_free *)kmap_atomic(m_page) +
772 m_offset / sizeof(*link);
773 first_page->freelist = link->next;
774 memset(link, POISON_INUSE, sizeof(*link));
775 kunmap_atomic(link);
776
777 first_page->inuse++;
778 /* Now move the zspage to another fullness group, if required */
779 fix_fullness_group(pool, first_page);
780 spin_unlock(&class->lock);
781
782 return obj;
783}
784EXPORT_SYMBOL_GPL(zs_malloc);
785
Minchan Kimc2344342012-06-08 15:39:25 +0900786void zs_free(struct zs_pool *pool, unsigned long obj)
Nitin Gupta61989a82012-01-09 16:51:56 -0600787{
788 struct link_free *link;
789 struct page *first_page, *f_page;
790 unsigned long f_objidx, f_offset;
791
792 int class_idx;
793 struct size_class *class;
794 enum fullness_group fullness;
795
796 if (unlikely(!obj))
797 return;
798
799 obj_handle_to_location(obj, &f_page, &f_objidx);
800 first_page = get_first_page(f_page);
801
802 get_zspage_mapping(first_page, &class_idx, &fullness);
803 class = &pool->size_class[class_idx];
804 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
805
806 spin_lock(&class->lock);
807
808 /* Insert this object in containing zspage's freelist */
809 link = (struct link_free *)((unsigned char *)kmap_atomic(f_page)
810 + f_offset);
811 link->next = first_page->freelist;
812 kunmap_atomic(link);
Minchan Kimc2344342012-06-08 15:39:25 +0900813 first_page->freelist = (void *)obj;
Nitin Gupta61989a82012-01-09 16:51:56 -0600814
815 first_page->inuse--;
816 fullness = fix_fullness_group(pool, first_page);
817
818 if (fullness == ZS_EMPTY)
Minchan Kim2e3b6152012-05-03 15:40:39 +0900819 class->pages_allocated -= class->pages_per_zspage;
Nitin Gupta61989a82012-01-09 16:51:56 -0600820
821 spin_unlock(&class->lock);
822
823 if (fullness == ZS_EMPTY)
824 free_zspage(first_page);
825}
826EXPORT_SYMBOL_GPL(zs_free);
827
Minchan Kim00a61d82012-05-03 15:40:40 +0900828/**
829 * zs_map_object - get address of allocated object from handle.
830 * @pool: pool from which the object was allocated
831 * @handle: handle returned from zs_malloc
832 *
833 * Before using an object allocated from zs_malloc, it must be mapped using
834 * this function. When done with the object, it must be unmapped using
Seth Jennings166cfda2012-07-02 16:15:51 -0500835 * zs_unmap_object.
836 *
837 * Only one object can be mapped per cpu at a time. There is no protection
838 * against nested mappings.
839 *
840 * This function returns with preemption and page faults disabled.
Minchan Kim00a61d82012-05-03 15:40:40 +0900841*/
Seth Jenningsb7418512012-07-02 16:15:52 -0500842void *zs_map_object(struct zs_pool *pool, unsigned long handle,
843 enum zs_mapmode mm)
Nitin Gupta61989a82012-01-09 16:51:56 -0600844{
845 struct page *page;
846 unsigned long obj_idx, off;
847
848 unsigned int class_idx;
849 enum fullness_group fg;
850 struct size_class *class;
851 struct mapping_area *area;
Seth Jenningsf5536462012-07-18 11:55:56 -0500852 struct page *pages[2];
Nitin Gupta61989a82012-01-09 16:51:56 -0600853
854 BUG_ON(!handle);
855
Seth Jenningsc60369f2012-07-18 11:55:55 -0500856 /*
857 * Because we use per-cpu mapping areas shared among the
858 * pools/users, we can't allow mapping in interrupt context
859 * because it can corrupt another users mappings.
860 */
861 BUG_ON(in_interrupt());
862
Nitin Gupta61989a82012-01-09 16:51:56 -0600863 obj_handle_to_location(handle, &page, &obj_idx);
864 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
865 class = &pool->size_class[class_idx];
866 off = obj_idx_to_offset(page, obj_idx, class->size);
867
868 area = &get_cpu_var(zs_map_area);
Seth Jenningsf5536462012-07-18 11:55:56 -0500869 area->vm_mm = mm;
Nitin Gupta61989a82012-01-09 16:51:56 -0600870 if (off + class->size <= PAGE_SIZE) {
871 /* this object is contained entirely within a page */
872 area->vm_addr = kmap_atomic(page);
Seth Jennings5f601902012-07-02 16:15:49 -0500873 return area->vm_addr + off;
Nitin Gupta61989a82012-01-09 16:51:56 -0600874 }
875
Seth Jenningsf5536462012-07-18 11:55:56 -0500876 /* this object spans two pages */
877 pages[0] = page;
878 pages[1] = get_next_page(page);
879 BUG_ON(!pages[1]);
Seth Jenningsb7418512012-07-02 16:15:52 -0500880
Seth Jenningsf5536462012-07-18 11:55:56 -0500881 return __zs_map_object(area, pages, off, class->size);
Nitin Gupta61989a82012-01-09 16:51:56 -0600882}
883EXPORT_SYMBOL_GPL(zs_map_object);
884
Minchan Kimc2344342012-06-08 15:39:25 +0900885void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
Nitin Gupta61989a82012-01-09 16:51:56 -0600886{
887 struct page *page;
888 unsigned long obj_idx, off;
889
890 unsigned int class_idx;
891 enum fullness_group fg;
892 struct size_class *class;
893 struct mapping_area *area;
894
895 BUG_ON(!handle);
896
897 obj_handle_to_location(handle, &page, &obj_idx);
898 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
899 class = &pool->size_class[class_idx];
900 off = obj_idx_to_offset(page, obj_idx, class->size);
901
Seth Jenningsf5536462012-07-18 11:55:56 -0500902 area = &__get_cpu_var(zs_map_area);
903 if (off + class->size <= PAGE_SIZE)
904 kunmap_atomic(area->vm_addr);
905 else {
906 struct page *pages[2];
Seth Jenningsb7418512012-07-02 16:15:52 -0500907
Seth Jenningsf5536462012-07-18 11:55:56 -0500908 pages[0] = page;
909 pages[1] = get_next_page(page);
910 BUG_ON(!pages[1]);
911
912 __zs_unmap_object(area, pages, off, class->size);
913 }
Nitin Gupta61989a82012-01-09 16:51:56 -0600914 put_cpu_var(zs_map_area);
915}
916EXPORT_SYMBOL_GPL(zs_unmap_object);
917
918u64 zs_get_total_size_bytes(struct zs_pool *pool)
919{
920 int i;
921 u64 npages = 0;
922
923 for (i = 0; i < ZS_SIZE_CLASSES; i++)
924 npages += pool->size_class[i].pages_allocated;
925
926 return npages << PAGE_SHIFT;
927}
928EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
Ben Hutchings069f1012012-06-20 02:31:11 +0100929
930module_init(zs_init);
931module_exit(zs_exit);
932
933MODULE_LICENSE("Dual BSD/GPL");
934MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");