blob: 44ceb0b823a9ad1cc6fda93300c714e365d7f8e1 [file] [log] [blame]
Greg Kroah-Hartman00bf19f2012-05-16 08:09:35 -07001/*
2 * xvmalloc memory allocator
3 *
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 */
12
13#ifdef CONFIG_ZRAM_DEBUG
14#define DEBUG
15#endif
16
17#include <linux/module.h>
18#include <linux/kernel.h>
19#include <linux/bitops.h>
20#include <linux/errno.h>
21#include <linux/highmem.h>
22#include <linux/init.h>
23#include <linux/string.h>
24#include <linux/slab.h>
25
26#include "xvmalloc.h"
27#include "xvmalloc_int.h"
28
29static void stat_inc(u64 *value)
30{
31 *value = *value + 1;
32}
33
34static void stat_dec(u64 *value)
35{
36 *value = *value - 1;
37}
38
39static int test_flag(struct block_header *block, enum blockflags flag)
40{
41 return block->prev & BIT(flag);
42}
43
44static void set_flag(struct block_header *block, enum blockflags flag)
45{
46 block->prev |= BIT(flag);
47}
48
49static void clear_flag(struct block_header *block, enum blockflags flag)
50{
51 block->prev &= ~BIT(flag);
52}
53
54/*
55 * Given <page, offset> pair, provide a dereferencable pointer.
56 * This is called from xv_malloc/xv_free path, so it
57 * needs to be fast.
58 */
59static void *get_ptr_atomic(struct page *page, u16 offset)
60{
61 unsigned char *base;
62
63 base = kmap_atomic(page);
64 return base + offset;
65}
66
67static void put_ptr_atomic(void *ptr)
68{
69 kunmap_atomic(ptr);
70}
71
72static u32 get_blockprev(struct block_header *block)
73{
74 return block->prev & PREV_MASK;
75}
76
77static void set_blockprev(struct block_header *block, u16 new_offset)
78{
79 block->prev = new_offset | (block->prev & FLAGS_MASK);
80}
81
82static struct block_header *BLOCK_NEXT(struct block_header *block)
83{
84 return (struct block_header *)
85 ((char *)block + block->size + XV_ALIGN);
86}
87
88/*
89 * Get index of free list containing blocks of maximum size
90 * which is less than or equal to given size.
91 */
92static u32 get_index_for_insert(u32 size)
93{
94 if (unlikely(size > XV_MAX_ALLOC_SIZE))
95 size = XV_MAX_ALLOC_SIZE;
96 size &= ~FL_DELTA_MASK;
97 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
98}
99
100/*
101 * Get index of free list having blocks of size greater than
102 * or equal to requested size.
103 */
104static u32 get_index(u32 size)
105{
106 if (unlikely(size < XV_MIN_ALLOC_SIZE))
107 size = XV_MIN_ALLOC_SIZE;
108 size = ALIGN(size, FL_DELTA);
109 return (size - XV_MIN_ALLOC_SIZE) >> FL_DELTA_SHIFT;
110}
111
112/**
113 * find_block - find block of at least given size
114 * @pool: memory pool to search from
115 * @size: size of block required
116 * @page: page containing required block
117 * @offset: offset within the page where block is located.
118 *
119 * Searches two level bitmap to locate block of at least
120 * the given size. If such a block is found, it provides
121 * <page, offset> to identify this block and returns index
122 * in freelist where we found this block.
123 * Otherwise, returns 0 and <page, offset> params are not touched.
124 */
125static u32 find_block(struct xv_pool *pool, u32 size,
126 struct page **page, u32 *offset)
127{
128 ulong flbitmap, slbitmap;
129 u32 flindex, slindex, slbitstart;
130
131 /* There are no free blocks in this pool */
132 if (!pool->flbitmap)
133 return 0;
134
135 /* Get freelist index corresponding to this size */
136 slindex = get_index(size);
137 slbitmap = pool->slbitmap[slindex / BITS_PER_LONG];
138 slbitstart = slindex % BITS_PER_LONG;
139
140 /*
141 * If freelist is not empty at this index, we found the
142 * block - head of this list. This is approximate best-fit match.
143 */
144 if (test_bit(slbitstart, &slbitmap)) {
145 *page = pool->freelist[slindex].page;
146 *offset = pool->freelist[slindex].offset;
147 return slindex;
148 }
149
150 /*
151 * No best-fit found. Search a bit further in bitmap for a free block.
152 * Second level bitmap consists of series of 32-bit chunks. Search
153 * further in the chunk where we expected a best-fit, starting from
154 * index location found above.
155 */
156 slbitstart++;
157 slbitmap >>= slbitstart;
158
159 /* Skip this search if we were already at end of this bitmap chunk */
160 if ((slbitstart != BITS_PER_LONG) && slbitmap) {
161 slindex += __ffs(slbitmap) + 1;
162 *page = pool->freelist[slindex].page;
163 *offset = pool->freelist[slindex].offset;
164 return slindex;
165 }
166
167 /* Now do a full two-level bitmap search to find next nearest fit */
168 flindex = slindex / BITS_PER_LONG;
169
170 flbitmap = (pool->flbitmap) >> (flindex + 1);
171 if (!flbitmap)
172 return 0;
173
174 flindex += __ffs(flbitmap) + 1;
175 slbitmap = pool->slbitmap[flindex];
176 slindex = (flindex * BITS_PER_LONG) + __ffs(slbitmap);
177 *page = pool->freelist[slindex].page;
178 *offset = pool->freelist[slindex].offset;
179
180 return slindex;
181}
182
183/*
184 * Insert block at <page, offset> in freelist of given pool.
185 * freelist used depends on block size.
186 */
187static void insert_block(struct xv_pool *pool, struct page *page, u32 offset,
188 struct block_header *block)
189{
190 u32 flindex, slindex;
191 struct block_header *nextblock;
192
193 slindex = get_index_for_insert(block->size);
194 flindex = slindex / BITS_PER_LONG;
195
196 block->link.prev_page = NULL;
197 block->link.prev_offset = 0;
198 block->link.next_page = pool->freelist[slindex].page;
199 block->link.next_offset = pool->freelist[slindex].offset;
200 pool->freelist[slindex].page = page;
201 pool->freelist[slindex].offset = offset;
202
203 if (block->link.next_page) {
204 nextblock = get_ptr_atomic(block->link.next_page,
205 block->link.next_offset);
206 nextblock->link.prev_page = page;
207 nextblock->link.prev_offset = offset;
208 put_ptr_atomic(nextblock);
209 /* If there was a next page then the free bits are set. */
210 return;
211 }
212
213 __set_bit(slindex % BITS_PER_LONG, &pool->slbitmap[flindex]);
214 __set_bit(flindex, &pool->flbitmap);
215}
216
217/*
218 * Remove block from freelist. Index 'slindex' identifies the freelist.
219 */
220static void remove_block(struct xv_pool *pool, struct page *page, u32 offset,
221 struct block_header *block, u32 slindex)
222{
223 u32 flindex = slindex / BITS_PER_LONG;
224 struct block_header *tmpblock;
225
226 if (block->link.prev_page) {
227 tmpblock = get_ptr_atomic(block->link.prev_page,
228 block->link.prev_offset);
229 tmpblock->link.next_page = block->link.next_page;
230 tmpblock->link.next_offset = block->link.next_offset;
231 put_ptr_atomic(tmpblock);
232 }
233
234 if (block->link.next_page) {
235 tmpblock = get_ptr_atomic(block->link.next_page,
236 block->link.next_offset);
237 tmpblock->link.prev_page = block->link.prev_page;
238 tmpblock->link.prev_offset = block->link.prev_offset;
239 put_ptr_atomic(tmpblock);
240 }
241
242 /* Is this block is at the head of the freelist? */
243 if (pool->freelist[slindex].page == page
244 && pool->freelist[slindex].offset == offset) {
245
246 pool->freelist[slindex].page = block->link.next_page;
247 pool->freelist[slindex].offset = block->link.next_offset;
248
249 if (pool->freelist[slindex].page) {
250 struct block_header *tmpblock;
251 tmpblock = get_ptr_atomic(pool->freelist[slindex].page,
252 pool->freelist[slindex].offset);
253 tmpblock->link.prev_page = NULL;
254 tmpblock->link.prev_offset = 0;
255 put_ptr_atomic(tmpblock);
256 } else {
257 /* This freelist bucket is empty */
258 __clear_bit(slindex % BITS_PER_LONG,
259 &pool->slbitmap[flindex]);
260 if (!pool->slbitmap[flindex])
261 __clear_bit(flindex, &pool->flbitmap);
262 }
263 }
264
265 block->link.prev_page = NULL;
266 block->link.prev_offset = 0;
267 block->link.next_page = NULL;
268 block->link.next_offset = 0;
269}
270
271/*
272 * Allocate a page and add it to freelist of given pool.
273 */
274static int grow_pool(struct xv_pool *pool, gfp_t flags)
275{
276 struct page *page;
277 struct block_header *block;
278
279 page = alloc_page(flags);
280 if (unlikely(!page))
281 return -ENOMEM;
282
283 stat_inc(&pool->total_pages);
284
285 spin_lock(&pool->lock);
286 block = get_ptr_atomic(page, 0);
287
288 block->size = PAGE_SIZE - XV_ALIGN;
289 set_flag(block, BLOCK_FREE);
290 clear_flag(block, PREV_FREE);
291 set_blockprev(block, 0);
292
293 insert_block(pool, page, 0, block);
294
295 put_ptr_atomic(block);
296 spin_unlock(&pool->lock);
297
298 return 0;
299}
300
301/*
302 * Create a memory pool. Allocates freelist, bitmaps and other
303 * per-pool metadata.
304 */
305struct xv_pool *xv_create_pool(void)
306{
307 u32 ovhd_size;
308 struct xv_pool *pool;
309
310 ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
311 pool = kzalloc(ovhd_size, GFP_KERNEL);
312 if (!pool)
313 return NULL;
314
315 spin_lock_init(&pool->lock);
316
317 return pool;
318}
319EXPORT_SYMBOL_GPL(xv_create_pool);
320
321void xv_destroy_pool(struct xv_pool *pool)
322{
323 kfree(pool);
324}
325EXPORT_SYMBOL_GPL(xv_destroy_pool);
326
327/**
328 * xv_malloc - Allocate block of given size from pool.
329 * @pool: pool to allocate from
330 * @size: size of block to allocate
331 * @page: page no. that holds the object
332 * @offset: location of object within page
333 *
334 * On success, <page, offset> identifies block allocated
335 * and 0 is returned. On failure, <page, offset> is set to
336 * 0 and -ENOMEM is returned.
337 *
338 * Allocation requests with size > XV_MAX_ALLOC_SIZE will fail.
339 */
340int xv_malloc(struct xv_pool *pool, u32 size, struct page **page,
341 u32 *offset, gfp_t flags)
342{
343 int error;
344 u32 index, tmpsize, origsize, tmpoffset;
345 struct block_header *block, *tmpblock;
346
347 *page = NULL;
348 *offset = 0;
349 origsize = size;
350
351 if (unlikely(!size || size > XV_MAX_ALLOC_SIZE))
352 return -ENOMEM;
353
354 size = ALIGN(size, XV_ALIGN);
355
356 spin_lock(&pool->lock);
357
358 index = find_block(pool, size, page, offset);
359
360 if (!*page) {
361 spin_unlock(&pool->lock);
362 if (flags & GFP_NOWAIT)
363 return -ENOMEM;
364 error = grow_pool(pool, flags);
365 if (unlikely(error))
366 return error;
367
368 spin_lock(&pool->lock);
369 index = find_block(pool, size, page, offset);
370 }
371
372 if (!*page) {
373 spin_unlock(&pool->lock);
374 return -ENOMEM;
375 }
376
377 block = get_ptr_atomic(*page, *offset);
378
379 remove_block(pool, *page, *offset, block, index);
380
381 /* Split the block if required */
382 tmpoffset = *offset + size + XV_ALIGN;
383 tmpsize = block->size - size;
384 tmpblock = (struct block_header *)((char *)block + size + XV_ALIGN);
385 if (tmpsize) {
386 tmpblock->size = tmpsize - XV_ALIGN;
387 set_flag(tmpblock, BLOCK_FREE);
388 clear_flag(tmpblock, PREV_FREE);
389
390 set_blockprev(tmpblock, *offset);
391 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
392 insert_block(pool, *page, tmpoffset, tmpblock);
393
394 if (tmpoffset + XV_ALIGN + tmpblock->size != PAGE_SIZE) {
395 tmpblock = BLOCK_NEXT(tmpblock);
396 set_blockprev(tmpblock, tmpoffset);
397 }
398 } else {
399 /* This block is exact fit */
400 if (tmpoffset != PAGE_SIZE)
401 clear_flag(tmpblock, PREV_FREE);
402 }
403
404 block->size = origsize;
405 clear_flag(block, BLOCK_FREE);
406
407 put_ptr_atomic(block);
408 spin_unlock(&pool->lock);
409
410 *offset += XV_ALIGN;
411
412 return 0;
413}
414EXPORT_SYMBOL_GPL(xv_malloc);
415
416/*
417 * Free block identified with <page, offset>
418 */
419void xv_free(struct xv_pool *pool, struct page *page, u32 offset)
420{
421 void *page_start;
422 struct block_header *block, *tmpblock;
423
424 offset -= XV_ALIGN;
425
426 spin_lock(&pool->lock);
427
428 page_start = get_ptr_atomic(page, 0);
429 block = (struct block_header *)((char *)page_start + offset);
430
431 /* Catch double free bugs */
432 BUG_ON(test_flag(block, BLOCK_FREE));
433
434 block->size = ALIGN(block->size, XV_ALIGN);
435
436 tmpblock = BLOCK_NEXT(block);
437 if (offset + block->size + XV_ALIGN == PAGE_SIZE)
438 tmpblock = NULL;
439
440 /* Merge next block if its free */
441 if (tmpblock && test_flag(tmpblock, BLOCK_FREE)) {
442 /*
443 * Blocks smaller than XV_MIN_ALLOC_SIZE
444 * are not inserted in any free list.
445 */
446 if (tmpblock->size >= XV_MIN_ALLOC_SIZE) {
447 remove_block(pool, page,
448 offset + block->size + XV_ALIGN, tmpblock,
449 get_index_for_insert(tmpblock->size));
450 }
451 block->size += tmpblock->size + XV_ALIGN;
452 }
453
454 /* Merge previous block if its free */
455 if (test_flag(block, PREV_FREE)) {
456 tmpblock = (struct block_header *)((char *)(page_start) +
457 get_blockprev(block));
458 offset = offset - tmpblock->size - XV_ALIGN;
459
460 if (tmpblock->size >= XV_MIN_ALLOC_SIZE)
461 remove_block(pool, page, offset, tmpblock,
462 get_index_for_insert(tmpblock->size));
463
464 tmpblock->size += block->size + XV_ALIGN;
465 block = tmpblock;
466 }
467
468 /* No used objects in this page. Free it. */
469 if (block->size == PAGE_SIZE - XV_ALIGN) {
470 put_ptr_atomic(page_start);
471 spin_unlock(&pool->lock);
472
473 __free_page(page);
474 stat_dec(&pool->total_pages);
475 return;
476 }
477
478 set_flag(block, BLOCK_FREE);
479 if (block->size >= XV_MIN_ALLOC_SIZE)
480 insert_block(pool, page, offset, block);
481
482 if (offset + block->size + XV_ALIGN != PAGE_SIZE) {
483 tmpblock = BLOCK_NEXT(block);
484 set_flag(tmpblock, PREV_FREE);
485 set_blockprev(tmpblock, offset);
486 }
487
488 put_ptr_atomic(page_start);
489 spin_unlock(&pool->lock);
490}
491EXPORT_SYMBOL_GPL(xv_free);
492
493u32 xv_get_object_size(void *obj)
494{
495 struct block_header *blk;
496
497 blk = (struct block_header *)((char *)(obj) - XV_ALIGN);
498 return blk->size;
499}
500EXPORT_SYMBOL_GPL(xv_get_object_size);
501
502/*
503 * Returns total memory used by allocator (userdata + metadata)
504 */
505u64 xv_get_total_size_bytes(struct xv_pool *pool)
506{
507 return pool->total_pages << PAGE_SHIFT;
508}
509EXPORT_SYMBOL_GPL(xv_get_total_size_bytes);