blob: ee5eb12b9285a8579bfdbef821cf80548a4e5ad7 [file] [log] [blame]
Nitin Gupta306b0c92009-09-22 10:26:53 +05301/*
2 * Compressed RAM based swap device
3 *
Nitin Gupta1130ebb2010-01-28 21:21:35 +05304 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
Nitin Gupta306b0c92009-09-22 10:26:53 +05305 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#define KMSG_COMPONENT "ramzswap"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
20#include <linux/bitops.h>
21#include <linux/blkdev.h>
22#include <linux/buffer_head.h>
23#include <linux/device.h>
24#include <linux/genhd.h>
25#include <linux/highmem.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090026#include <linux/slab.h>
Nitin Gupta306b0c92009-09-22 10:26:53 +053027#include <linux/lzo.h>
Nitin Gupta306b0c92009-09-22 10:26:53 +053028#include <linux/string.h>
29#include <linux/swap.h>
30#include <linux/swapops.h>
31#include <linux/vmalloc.h>
Nitin Gupta306b0c92009-09-22 10:26:53 +053032
33#include "ramzswap_drv.h"
34
35/* Globals */
36static int ramzswap_major;
37static struct ramzswap *devices;
38
39/*
40 * Pages that compress to larger than this size are
41 * forwarded to backing swap, if present or stored
42 * uncompressed in memory otherwise.
43 */
44static unsigned int max_zpage_size;
45
46/* Module params (documentation at end) */
47static unsigned int num_devices;
48
49static int rzs_test_flag(struct ramzswap *rzs, u32 index,
50 enum rzs_pageflags flag)
51{
52 return rzs->table[index].flags & BIT(flag);
53}
54
55static void rzs_set_flag(struct ramzswap *rzs, u32 index,
56 enum rzs_pageflags flag)
57{
58 rzs->table[index].flags |= BIT(flag);
59}
60
61static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
62 enum rzs_pageflags flag)
63{
64 rzs->table[index].flags &= ~BIT(flag);
65}
66
67static int page_zero_filled(void *ptr)
68{
69 unsigned int pos;
70 unsigned long *page;
71
72 page = (unsigned long *)ptr;
73
74 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
75 if (page[pos])
76 return 0;
77 }
78
79 return 1;
80}
81
82/*
83 * memlimit cannot be greater than backing disk size.
84 */
85static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
86{
87 int memlimit_valid = 1;
88
89 if (!rzs->memlimit) {
90 pr_info("Memory limit not set.\n");
91 memlimit_valid = 0;
92 }
93
94 if (rzs->memlimit > rzs->disksize) {
95 pr_info("Memory limit cannot be greater than "
96 "disksize: limit=%zu, disksize=%zu\n",
97 rzs->memlimit, rzs->disksize);
98 memlimit_valid = 0;
99 }
100
101 if (!memlimit_valid) {
102 size_t mempart, disksize;
103 pr_info("Using default: smaller of (%u%% of RAM) and "
104 "(backing disk size).\n",
105 default_memlimit_perc_ram);
106 mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
107 disksize = rzs->disksize;
108 rzs->memlimit = mempart > disksize ? disksize : mempart;
109 }
110
111 if (rzs->memlimit > totalram_bytes / 2) {
112 pr_info(
113 "Its not advisable setting limit more than half of "
114 "size of memory since we expect a 2:1 compression ratio. "
115 "Limit represents amount of *compressed* data we can keep "
116 "in memory!\n"
117 "\tMemory Size: %zu kB\n"
118 "\tLimit you selected: %zu kB\n"
119 "Continuing anyway ...\n",
120 totalram_bytes >> 10, rzs->memlimit >> 10
121 );
122 }
123
124 rzs->memlimit &= PAGE_MASK;
125 BUG_ON(!rzs->memlimit);
126}
127
128static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
129{
130 if (!rzs->disksize) {
131 pr_info(
132 "disk size not provided. You can use disksize_kb module "
133 "param to specify size.\nUsing default: (%u%% of RAM).\n",
134 default_disksize_perc_ram
135 );
136 rzs->disksize = default_disksize_perc_ram *
137 (totalram_bytes / 100);
138 }
139
140 if (rzs->disksize > 2 * (totalram_bytes)) {
141 pr_info(
142 "There is little point creating a ramzswap of greater than "
143 "twice the size of memory since we expect a 2:1 compression "
144 "ratio. Note that ramzswap uses about 0.1%% of the size of "
145 "the swap device when not in use so a huge ramzswap is "
146 "wasteful.\n"
147 "\tMemory Size: %zu kB\n"
148 "\tSize you selected: %zu kB\n"
149 "Continuing anyway ...\n",
150 totalram_bytes >> 10, rzs->disksize
151 );
152 }
153
154 rzs->disksize &= PAGE_MASK;
155}
156
157/*
158 * Swap header (1st page of swap device) contains information
159 * to indentify it as a swap partition. Prepare such a header
160 * for ramzswap device (ramzswap0) so that swapon can identify
161 * it as swap partition. In case backing swap device is provided,
162 * copy its swap header.
163 */
164static int setup_swap_header(struct ramzswap *rzs, union swap_header *s)
165{
166 int ret = 0;
167 struct page *page;
168 struct address_space *mapping;
169 union swap_header *backing_swap_header;
170
171 /*
172 * There is no backing swap device. Create a swap header
173 * that is acceptable by swapon.
174 */
175 if (!rzs->backing_swap) {
176 s->info.version = 1;
177 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
178 s->info.nr_badpages = 0;
179 memcpy(s->magic.magic, "SWAPSPACE2", 10);
180 return 0;
181 }
182
183 /*
184 * We have a backing swap device. Copy its swap header
185 * to ramzswap device header. If this header contains
186 * invalid information (backing device not a swap
187 * partition, etc.), swapon will fail for ramzswap
188 * which is correct behavior - we don't want to swap
189 * over filesystem partition!
190 */
191
192 /* Read the backing swap header (code from sys_swapon) */
193 mapping = rzs->swap_file->f_mapping;
194 if (!mapping->a_ops->readpage) {
195 ret = -EINVAL;
196 goto out;
197 }
198
199 page = read_mapping_page(mapping, 0, rzs->swap_file);
200 if (IS_ERR(page)) {
201 ret = PTR_ERR(page);
202 goto out;
203 }
204
205 backing_swap_header = kmap(page);
206 memcpy(s, backing_swap_header, sizeof(*s));
207 if (s->info.nr_badpages) {
208 pr_info("Cannot use backing swap with bad pages (%u)\n",
209 s->info.nr_badpages);
210 ret = -EINVAL;
211 }
212 /*
213 * ramzswap disksize equals number of usable pages in backing
214 * swap. Set last_page in swap header to match this disksize
215 * ('last_page' means 0-based index of last usable swap page).
216 */
217 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
218 kunmap(page);
219
220out:
221 return ret;
222}
223
Nitin Guptac25d75a2010-01-28 21:19:59 +0530224static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
Nitin Gupta306b0c92009-09-22 10:26:53 +0530225 struct ramzswap_ioctl_stats *s)
226{
227 strncpy(s->backing_swap_name, rzs->backing_swap_name,
228 MAX_SWAP_NAME_LEN - 1);
229 s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
230
231 s->disksize = rzs->disksize;
232 s->memlimit = rzs->memlimit;
233
234#if defined(CONFIG_RAMZSWAP_STATS)
235 {
236 struct ramzswap_stats *rs = &rzs->stats;
237 size_t succ_writes, mem_used;
238 unsigned int good_compress_perc = 0, no_compress_perc = 0;
239
240 mem_used = xv_get_total_size_bytes(rzs->mem_pool)
241 + (rs->pages_expand << PAGE_SHIFT);
Nitin Gupta6a907722010-01-28 21:13:37 +0530242 succ_writes = rzs_stat64_read(rzs, &rs->num_writes) -
243 rzs_stat64_read(rzs, &rs->failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530244
245 if (succ_writes && rs->pages_stored) {
246 good_compress_perc = rs->good_compress * 100
247 / rs->pages_stored;
248 no_compress_perc = rs->pages_expand * 100
249 / rs->pages_stored;
250 }
251
Nitin Gupta6a907722010-01-28 21:13:37 +0530252 s->num_reads = rzs_stat64_read(rzs, &rs->num_reads);
253 s->num_writes = rzs_stat64_read(rzs, &rs->num_writes);
254 s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads);
255 s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes);
256 s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io);
257 s->notify_free = rzs_stat64_read(rzs, &rs->notify_free);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530258 s->pages_zero = rs->pages_zero;
259
260 s->good_compress_pct = good_compress_perc;
261 s->pages_expand_pct = no_compress_perc;
262
263 s->pages_stored = rs->pages_stored;
264 s->pages_used = mem_used >> PAGE_SHIFT;
265 s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
266 s->compr_data_size = rs->compr_size;
267 s->mem_used_total = mem_used;
268
Nitin Gupta6a907722010-01-28 21:13:37 +0530269 s->bdev_num_reads = rzs_stat64_read(rzs, &rs->bdev_num_reads);
270 s->bdev_num_writes = rzs_stat64_read(rzs, &rs->bdev_num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530271 }
272#endif /* CONFIG_RAMZSWAP_STATS */
273}
274
275static int add_backing_swap_extent(struct ramzswap *rzs,
276 pgoff_t phy_pagenum,
277 pgoff_t num_pages)
278{
279 unsigned int idx;
280 struct list_head *head;
281 struct page *curr_page, *new_page;
282 unsigned int extents_per_page = PAGE_SIZE /
283 sizeof(struct ramzswap_backing_extent);
284
285 idx = rzs->num_extents % extents_per_page;
286 if (!idx) {
287 new_page = alloc_page(__GFP_ZERO);
288 if (!new_page)
289 return -ENOMEM;
290
291 if (rzs->num_extents) {
292 curr_page = virt_to_page(rzs->curr_extent);
293 head = &curr_page->lru;
294 } else {
295 head = &rzs->backing_swap_extent_list;
296 }
297
298 list_add(&new_page->lru, head);
299 rzs->curr_extent = page_address(new_page);
300 }
301
302 rzs->curr_extent->phy_pagenum = phy_pagenum;
303 rzs->curr_extent->num_pages = num_pages;
304
305 pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
306 "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
307 phy_pagenum + num_pages - 1, rzs->curr_extent);
308
309 if (idx != extents_per_page - 1)
310 rzs->curr_extent++;
311
312 return 0;
313}
314
315static int setup_backing_swap_extents(struct ramzswap *rzs,
316 struct inode *inode, unsigned long *num_pages)
317{
318 int ret = 0;
319 unsigned blkbits;
320 unsigned blocks_per_page;
321 pgoff_t contig_pages = 0, total_pages = 0;
322 pgoff_t pagenum = 0, prev_pagenum = 0;
323 sector_t probe_block = 0;
324 sector_t last_block;
325
326 blkbits = inode->i_blkbits;
327 blocks_per_page = PAGE_SIZE >> blkbits;
328
329 last_block = i_size_read(inode) >> blkbits;
330 while (probe_block + blocks_per_page <= last_block) {
331 unsigned block_in_page;
332 sector_t first_block;
333
334 first_block = bmap(inode, probe_block);
335 if (first_block == 0)
336 goto bad_bmap;
337
338 /* It must be PAGE_SIZE aligned on-disk */
339 if (first_block & (blocks_per_page - 1)) {
340 probe_block++;
341 goto probe_next;
342 }
343
344 /* All blocks within this page must be contiguous on disk */
345 for (block_in_page = 1; block_in_page < blocks_per_page;
346 block_in_page++) {
347 sector_t block;
348
349 block = bmap(inode, probe_block + block_in_page);
350 if (block == 0)
351 goto bad_bmap;
352 if (block != first_block + block_in_page) {
353 /* Discontiguity */
354 probe_block++;
355 goto probe_next;
356 }
357 }
358
359 /*
360 * We found a PAGE_SIZE length, PAGE_SIZE aligned
361 * run of blocks.
362 */
363 pagenum = first_block >> (PAGE_SHIFT - blkbits);
364
365 if (total_pages && (pagenum != prev_pagenum + 1)) {
366 ret = add_backing_swap_extent(rzs, prev_pagenum -
367 (contig_pages - 1), contig_pages);
368 if (ret < 0)
369 goto out;
370 rzs->num_extents++;
371 contig_pages = 0;
372 }
373 total_pages++;
374 contig_pages++;
375 prev_pagenum = pagenum;
376 probe_block += blocks_per_page;
377
378probe_next:
379 continue;
380 }
381
382 if (contig_pages) {
383 pr_debug("adding last extent: pagenum=%lu, "
384 "contig_pages=%lu\n", pagenum, contig_pages);
385 ret = add_backing_swap_extent(rzs,
386 prev_pagenum - (contig_pages - 1), contig_pages);
387 if (ret < 0)
388 goto out;
389 rzs->num_extents++;
390 }
391 if (!rzs->num_extents) {
392 pr_err("No swap extents found!\n");
393 ret = -EINVAL;
394 }
395
396 if (!ret) {
397 *num_pages = total_pages;
398 pr_info("Found %lu extents containing %luk\n",
399 rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
400 }
401 goto out;
402
403bad_bmap:
404 pr_err("Backing swapfile has holes\n");
405 ret = -EINVAL;
406out:
407 while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
408 struct page *page;
409 struct list_head *entry = rzs->backing_swap_extent_list.next;
410 page = list_entry(entry, struct page, lru);
411 list_del(entry);
412 __free_page(page);
413 }
414 return ret;
415}
416
417static void map_backing_swap_extents(struct ramzswap *rzs)
418{
419 struct ramzswap_backing_extent *se;
420 struct page *table_page, *se_page;
421 unsigned long num_pages, num_table_pages, entry;
422 unsigned long se_idx, span;
423 unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
424 unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
425
426 /* True for block device */
427 if (!rzs->num_extents)
428 return;
429
430 se_page = list_entry(rzs->backing_swap_extent_list.next,
431 struct page, lru);
432 se = page_address(se_page);
433 span = se->num_pages;
434 num_pages = rzs->disksize >> PAGE_SHIFT;
435 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
436 PAGE_SIZE);
437
438 entry = 0;
439 se_idx = 0;
440 while (num_table_pages--) {
441 table_page = vmalloc_to_page(&rzs->table[entry]);
442 while (span <= entry) {
443 se_idx++;
444 if (se_idx == rzs->num_extents)
445 BUG();
446
447 if (!(se_idx % extents_per_page)) {
448 se_page = list_entry(se_page->lru.next,
449 struct page, lru);
450 se = page_address(se_page);
451 } else
452 se++;
453
454 span += se->num_pages;
455 }
456 table_page->mapping = (struct address_space *)se;
457 table_page->private = se->num_pages - (span - entry);
458 pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
459 entry, span, table_page->mapping, table_page->private);
460 entry += entries_per_page;
461 }
462}
463
464/*
465 * Check if value of backing_swap module param is sane.
466 * Claim this device and set ramzswap size equal to
467 * size of this block device.
468 */
469static int setup_backing_swap(struct ramzswap *rzs)
470{
471 int ret = 0;
472 size_t disksize;
473 unsigned long num_pages = 0;
474 struct inode *inode;
475 struct file *swap_file;
476 struct address_space *mapping;
477 struct block_device *bdev = NULL;
478
479 if (!rzs->backing_swap_name[0]) {
480 pr_debug("backing_swap param not given\n");
481 goto out;
482 }
483
484 pr_info("Using backing swap device: %s\n", rzs->backing_swap_name);
485
486 swap_file = filp_open(rzs->backing_swap_name,
487 O_RDWR | O_LARGEFILE, 0);
488 if (IS_ERR(swap_file)) {
489 pr_err("Error opening backing device: %s\n",
490 rzs->backing_swap_name);
491 ret = -EINVAL;
492 goto out;
493 }
494
495 mapping = swap_file->f_mapping;
496 inode = mapping->host;
497
498 if (S_ISBLK(inode->i_mode)) {
499 bdev = I_BDEV(inode);
500 ret = bd_claim(bdev, setup_backing_swap);
501 if (ret < 0) {
502 bdev = NULL;
503 goto bad_param;
504 }
505 disksize = i_size_read(inode);
Nitin Guptac25d75a2010-01-28 21:19:59 +0530506 /*
507 * Can happen if user gives an extended partition as
508 * backing swap or simply a bad disk.
509 */
510 if (!disksize) {
511 pr_err("Error reading backing swap size.\n");
512 goto bad_param;
513 }
Nitin Gupta306b0c92009-09-22 10:26:53 +0530514 } else if (S_ISREG(inode->i_mode)) {
515 bdev = inode->i_sb->s_bdev;
516 if (IS_SWAPFILE(inode)) {
517 ret = -EBUSY;
518 goto bad_param;
519 }
520 ret = setup_backing_swap_extents(rzs, inode, &num_pages);
521 if (ret < 0)
522 goto bad_param;
523 disksize = num_pages << PAGE_SHIFT;
524 } else {
525 goto bad_param;
526 }
527
528 rzs->swap_file = swap_file;
529 rzs->backing_swap = bdev;
530 rzs->disksize = disksize;
Nitin Gupta306b0c92009-09-22 10:26:53 +0530531
532 return 0;
533
534bad_param:
535 if (bdev)
536 bd_release(bdev);
537 filp_close(swap_file, NULL);
538
539out:
540 rzs->backing_swap = NULL;
541 return ret;
542}
543
544/*
545 * Map logical page number 'pagenum' to physical page number
546 * on backing swap device. For block device, this is a nop.
547 */
Nitin Guptac25d75a2010-01-28 21:19:59 +0530548static u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
Nitin Gupta306b0c92009-09-22 10:26:53 +0530549{
550 u32 skip_pages, entries_per_page;
551 size_t delta, se_offset, skipped;
552 struct page *table_page, *se_page;
553 struct ramzswap_backing_extent *se;
554
555 if (!rzs->num_extents)
556 return pagenum;
557
558 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
559
560 table_page = vmalloc_to_page(&rzs->table[pagenum]);
561 se = (struct ramzswap_backing_extent *)table_page->mapping;
562 se_page = virt_to_page(se);
563
564 skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
565 se_offset = table_page->private + skip_pages;
566
567 if (se_offset < se->num_pages)
568 return se->phy_pagenum + se_offset;
569
570 skipped = se->num_pages - table_page->private;
571 do {
572 struct ramzswap_backing_extent *se_base;
573 u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
574
575 /* Get next swap extent */
576 se_base = (struct ramzswap_backing_extent *)
577 page_address(se_page);
578 if (se - se_base == se_entries_per_page - 1) {
579 se_page = list_entry(se_page->lru.next,
580 struct page, lru);
581 se = page_address(se_page);
582 } else {
583 se++;
584 }
585
586 skipped += se->num_pages;
587 } while (skipped < skip_pages);
588
589 delta = skipped - skip_pages;
590 se_offset = se->num_pages - delta;
591
592 return se->phy_pagenum + se_offset;
593}
594
595static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
596{
597 u32 clen;
598 void *obj;
599
600 struct page *page = rzs->table[index].page;
601 u32 offset = rzs->table[index].offset;
602
603 if (unlikely(!page)) {
Nitin Gupta2e882282010-01-28 21:13:41 +0530604 /*
605 * No memory is allocated for zero filled pages.
606 * Simply clear zero page flag.
607 */
Nitin Gupta306b0c92009-09-22 10:26:53 +0530608 if (rzs_test_flag(rzs, index, RZS_ZERO)) {
609 rzs_clear_flag(rzs, index, RZS_ZERO);
Nitin Gupta6a907722010-01-28 21:13:37 +0530610 rzs_stat_dec(&rzs->stats.pages_zero);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530611 }
612 return;
613 }
614
615 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
616 clen = PAGE_SIZE;
617 __free_page(page);
618 rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
Nitin Gupta6a907722010-01-28 21:13:37 +0530619 rzs_stat_dec(&rzs->stats.pages_expand);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530620 goto out;
621 }
622
623 obj = kmap_atomic(page, KM_USER0) + offset;
624 clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
625 kunmap_atomic(obj, KM_USER0);
626
627 xv_free(rzs->mem_pool, page, offset);
628 if (clen <= PAGE_SIZE / 2)
Nitin Gupta6a907722010-01-28 21:13:37 +0530629 rzs_stat_dec(&rzs->stats.good_compress);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530630
631out:
632 rzs->stats.compr_size -= clen;
Nitin Gupta6a907722010-01-28 21:13:37 +0530633 rzs_stat_dec(&rzs->stats.pages_stored);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530634
635 rzs->table[index].page = NULL;
636 rzs->table[index].offset = 0;
637}
638
639static int handle_zero_page(struct bio *bio)
640{
641 void *user_mem;
642 struct page *page = bio->bi_io_vec[0].bv_page;
643
644 user_mem = kmap_atomic(page, KM_USER0);
645 memset(user_mem, 0, PAGE_SIZE);
646 kunmap_atomic(user_mem, KM_USER0);
647
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530648 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530649
650 set_bit(BIO_UPTODATE, &bio->bi_flags);
651 bio_endio(bio, 0);
652 return 0;
653}
654
655static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
656{
657 u32 index;
658 struct page *page;
659 unsigned char *user_mem, *cmem;
660
661 page = bio->bi_io_vec[0].bv_page;
662 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
663
664 user_mem = kmap_atomic(page, KM_USER0);
665 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
666 rzs->table[index].offset;
667
668 memcpy(user_mem, cmem, PAGE_SIZE);
669 kunmap_atomic(user_mem, KM_USER0);
670 kunmap_atomic(cmem, KM_USER1);
671
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530672 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530673
674 set_bit(BIO_UPTODATE, &bio->bi_flags);
675 bio_endio(bio, 0);
676 return 0;
677}
678
Nitin Gupta306b0c92009-09-22 10:26:53 +0530679/*
680 * Called when request page is not present in ramzswap.
681 * Its either in backing swap device (if present) or
682 * this is an attempt to read before any previous write
683 * to this location - this happens due to readahead when
684 * swap device is read from user-space (e.g. during swapon)
685 */
686static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
687{
688 /*
689 * Always forward such requests to backing swap
690 * device (if present)
691 */
692 if (rzs->backing_swap) {
693 u32 pagenum;
Nitin Gupta6a907722010-01-28 21:13:37 +0530694 rzs_stat64_dec(rzs, &rzs->stats.num_reads);
695 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530696 bio->bi_bdev = rzs->backing_swap;
697
698 /*
699 * In case backing swap is a file, find the right offset within
700 * the file corresponding to logical position 'index'. For block
701 * device, this is a nop.
702 */
703 pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
704 bio->bi_sector = map_backing_swap_page(rzs, pagenum)
705 << SECTORS_PER_PAGE_SHIFT;
706 return 1;
707 }
708
709 /*
710 * Its unlikely event in case backing dev is
711 * not present
712 */
713 pr_debug("Read before write on swap device: "
714 "sector=%lu, size=%u, offset=%u\n",
715 (ulong)(bio->bi_sector), bio->bi_size,
716 bio->bi_io_vec[0].bv_offset);
717
718 /* Do nothing. Just return success */
719 set_bit(BIO_UPTODATE, &bio->bi_flags);
720 bio_endio(bio, 0);
721 return 0;
722}
723
724static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
725{
726 int ret;
727 u32 index;
728 size_t clen;
729 struct page *page;
730 struct zobj_header *zheader;
731 unsigned char *user_mem, *cmem;
732
Nitin Gupta6a907722010-01-28 21:13:37 +0530733 rzs_stat64_inc(rzs, &rzs->stats.num_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530734
735 page = bio->bi_io_vec[0].bv_page;
736 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
737
738 if (rzs_test_flag(rzs, index, RZS_ZERO))
739 return handle_zero_page(bio);
740
741 /* Requested page is not present in compressed area */
742 if (!rzs->table[index].page)
743 return handle_ramzswap_fault(rzs, bio);
744
C ypef4ffb72010-01-06 13:42:00 +0100745 /* Page is stored uncompressed since it's incompressible */
Nitin Gupta306b0c92009-09-22 10:26:53 +0530746 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
747 return handle_uncompressed_page(rzs, bio);
748
749 user_mem = kmap_atomic(page, KM_USER0);
750 clen = PAGE_SIZE;
751
752 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
753 rzs->table[index].offset;
754
755 ret = lzo1x_decompress_safe(
756 cmem + sizeof(*zheader),
757 xv_get_object_size(cmem) - sizeof(*zheader),
758 user_mem, &clen);
759
760 kunmap_atomic(user_mem, KM_USER0);
761 kunmap_atomic(cmem, KM_USER1);
762
763 /* should NEVER happen */
764 if (unlikely(ret != LZO_E_OK)) {
765 pr_err("Decompression failed! err=%d, page=%u\n",
766 ret, index);
Nitin Gupta6a907722010-01-28 21:13:37 +0530767 rzs_stat64_inc(rzs, &rzs->stats.failed_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530768 goto out;
769 }
770
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530771 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530772
773 set_bit(BIO_UPTODATE, &bio->bi_flags);
774 bio_endio(bio, 0);
775 return 0;
776
777out:
778 bio_io_error(bio);
779 return 0;
780}
781
782static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
783{
784 int ret, fwd_write_request = 0;
785 u32 offset, index;
786 size_t clen;
787 struct zobj_header *zheader;
788 struct page *page, *page_store;
789 unsigned char *user_mem, *cmem, *src;
790
Nitin Gupta6a907722010-01-28 21:13:37 +0530791 rzs_stat64_inc(rzs, &rzs->stats.num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530792
793 page = bio->bi_io_vec[0].bv_page;
794 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
795
796 src = rzs->compress_buffer;
797
798 /*
799 * System swaps to same sector again when the stored page
800 * is no longer referenced by any process. So, its now safe
801 * to free the memory that was allocated for this page.
802 */
Nitin Gupta2e882282010-01-28 21:13:41 +0530803 if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO))
Nitin Gupta306b0c92009-09-22 10:26:53 +0530804 ramzswap_free_page(rzs, index);
805
Nitin Gupta306b0c92009-09-22 10:26:53 +0530806 mutex_lock(&rzs->lock);
807
808 user_mem = kmap_atomic(page, KM_USER0);
809 if (page_zero_filled(user_mem)) {
810 kunmap_atomic(user_mem, KM_USER0);
811 mutex_unlock(&rzs->lock);
Nitin Gupta6a907722010-01-28 21:13:37 +0530812 rzs_stat_inc(&rzs->stats.pages_zero);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530813 rzs_set_flag(rzs, index, RZS_ZERO);
814
815 set_bit(BIO_UPTODATE, &bio->bi_flags);
816 bio_endio(bio, 0);
817 return 0;
818 }
819
820 if (rzs->backing_swap &&
821 (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
822 kunmap_atomic(user_mem, KM_USER0);
823 mutex_unlock(&rzs->lock);
824 fwd_write_request = 1;
825 goto out;
826 }
827
828 ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
829 rzs->compress_workmem);
830
831 kunmap_atomic(user_mem, KM_USER0);
832
833 if (unlikely(ret != LZO_E_OK)) {
834 mutex_unlock(&rzs->lock);
835 pr_err("Compression failed! err=%d\n", ret);
Nitin Gupta6a907722010-01-28 21:13:37 +0530836 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530837 goto out;
838 }
839
840 /*
841 * Page is incompressible. Forward it to backing swap
842 * if present. Otherwise, store it as-is (uncompressed)
843 * since we do not want to return too many swap write
844 * errors which has side effect of hanging the system.
845 */
846 if (unlikely(clen > max_zpage_size)) {
847 if (rzs->backing_swap) {
848 mutex_unlock(&rzs->lock);
849 fwd_write_request = 1;
850 goto out;
851 }
852
853 clen = PAGE_SIZE;
854 page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
855 if (unlikely(!page_store)) {
856 mutex_unlock(&rzs->lock);
857 pr_info("Error allocating memory for incompressible "
858 "page: %u\n", index);
Nitin Gupta6a907722010-01-28 21:13:37 +0530859 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530860 goto out;
861 }
862
863 offset = 0;
864 rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
Nitin Gupta6a907722010-01-28 21:13:37 +0530865 rzs_stat_inc(&rzs->stats.pages_expand);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530866 rzs->table[index].page = page_store;
867 src = kmap_atomic(page, KM_USER0);
868 goto memstore;
869 }
870
871 if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
872 &rzs->table[index].page, &offset,
873 GFP_NOIO | __GFP_HIGHMEM)) {
874 mutex_unlock(&rzs->lock);
875 pr_info("Error allocating memory for compressed "
876 "page: %u, size=%zu\n", index, clen);
Nitin Gupta6a907722010-01-28 21:13:37 +0530877 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530878 if (rzs->backing_swap)
879 fwd_write_request = 1;
880 goto out;
881 }
882
883memstore:
884 rzs->table[index].offset = offset;
885
886 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
887 rzs->table[index].offset;
888
889#if 0
890 /* Back-reference needed for memory defragmentation */
891 if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
892 zheader = (struct zobj_header *)cmem;
893 zheader->table_idx = index;
894 cmem += sizeof(*zheader);
895 }
896#endif
897
898 memcpy(cmem, src, clen);
899
900 kunmap_atomic(cmem, KM_USER1);
901 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
902 kunmap_atomic(src, KM_USER0);
903
904 /* Update stats */
905 rzs->stats.compr_size += clen;
Nitin Gupta6a907722010-01-28 21:13:37 +0530906 rzs_stat_inc(&rzs->stats.pages_stored);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530907 if (clen <= PAGE_SIZE / 2)
Nitin Gupta6a907722010-01-28 21:13:37 +0530908 rzs_stat_inc(&rzs->stats.good_compress);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530909
910 mutex_unlock(&rzs->lock);
911
912 set_bit(BIO_UPTODATE, &bio->bi_flags);
913 bio_endio(bio, 0);
914 return 0;
915
916out:
917 if (fwd_write_request) {
Nitin Gupta6a907722010-01-28 21:13:37 +0530918 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530919 bio->bi_bdev = rzs->backing_swap;
920#if 0
921 /*
922 * TODO: We currently have linear mapping of ramzswap and
923 * backing swap sectors. This is not desired since we want
924 * to optimize writes to backing swap to minimize disk seeks
925 * or have effective wear leveling (for SSDs). Also, a
926 * non-linear mapping is required to implement compressed
927 * on-disk swapping.
928 */
929 bio->bi_sector = get_backing_swap_page()
930 << SECTORS_PER_PAGE_SHIFT;
931#endif
932 /*
933 * In case backing swap is a file, find the right offset within
934 * the file corresponding to logical position 'index'. For block
935 * device, this is a nop.
936 */
937 bio->bi_sector = map_backing_swap_page(rzs, index)
938 << SECTORS_PER_PAGE_SHIFT;
939 return 1;
940 }
941
942 bio_io_error(bio);
943 return 0;
944}
945
Nitin Gupta306b0c92009-09-22 10:26:53 +0530946/*
947 * Check if request is within bounds and page aligned.
948 */
949static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
950{
951 if (unlikely(
952 (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
953 (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
954 (bio->bi_vcnt != 1) ||
955 (bio->bi_size != PAGE_SIZE) ||
956 (bio->bi_io_vec[0].bv_offset != 0))) {
957
958 return 0;
959 }
960
961 /* swap request is valid */
962 return 1;
963}
964
965/*
966 * Handler function for all ramzswap I/O requests.
967 */
968static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
969{
970 int ret = 0;
971 struct ramzswap *rzs = queue->queuedata;
972
973 if (unlikely(!rzs->init_done)) {
974 bio_io_error(bio);
975 return 0;
976 }
977
978 if (!valid_swap_request(rzs, bio)) {
Nitin Gupta6a907722010-01-28 21:13:37 +0530979 rzs_stat64_inc(rzs, &rzs->stats.invalid_io);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530980 bio_io_error(bio);
981 return 0;
982 }
983
984 switch (bio_data_dir(bio)) {
985 case READ:
986 ret = ramzswap_read(rzs, bio);
987 break;
988
989 case WRITE:
990 ret = ramzswap_write(rzs, bio);
991 break;
992 }
993
994 return ret;
995}
996
997static void reset_device(struct ramzswap *rzs)
998{
999 int is_backing_blkdev = 0;
1000 size_t index, num_pages;
1001 unsigned entries_per_page;
1002 unsigned long num_table_pages, entry = 0;
1003
Nitin Gupta7eef7532010-01-28 21:13:38 +05301004 /* Do not accept any new I/O request */
1005 rzs->init_done = 0;
1006
Nitin Gupta306b0c92009-09-22 10:26:53 +05301007 if (rzs->backing_swap && !rzs->num_extents)
1008 is_backing_blkdev = 1;
1009
1010 num_pages = rzs->disksize >> PAGE_SHIFT;
1011
1012 /* Free various per-device buffers */
1013 kfree(rzs->compress_workmem);
1014 free_pages((unsigned long)rzs->compress_buffer, 1);
1015
1016 rzs->compress_workmem = NULL;
1017 rzs->compress_buffer = NULL;
1018
1019 /* Free all pages that are still in this ramzswap device */
1020 for (index = 0; index < num_pages; index++) {
1021 struct page *page;
1022 u16 offset;
1023
1024 page = rzs->table[index].page;
1025 offset = rzs->table[index].offset;
1026
1027 if (!page)
1028 continue;
1029
1030 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1031 __free_page(page);
1032 else
1033 xv_free(rzs->mem_pool, page, offset);
1034 }
1035
1036 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
1037 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
1038 PAGE_SIZE);
1039 /*
1040 * Set page->mapping to NULL for every table page.
1041 * Otherwise, we will hit bad_page() during free.
1042 */
1043 while (rzs->num_extents && num_table_pages--) {
1044 struct page *page;
1045 page = vmalloc_to_page(&rzs->table[entry]);
1046 page->mapping = NULL;
1047 entry += entries_per_page;
1048 }
1049 vfree(rzs->table);
1050 rzs->table = NULL;
1051
1052 xv_destroy_pool(rzs->mem_pool);
1053 rzs->mem_pool = NULL;
1054
1055 /* Free all swap extent pages */
1056 while (!list_empty(&rzs->backing_swap_extent_list)) {
1057 struct page *page;
1058 struct list_head *entry;
1059 entry = rzs->backing_swap_extent_list.next;
1060 page = list_entry(entry, struct page, lru);
1061 list_del(entry);
1062 __free_page(page);
1063 }
1064 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1065 rzs->num_extents = 0;
1066
1067 /* Close backing swap device, if present */
1068 if (rzs->backing_swap) {
1069 if (is_backing_blkdev)
1070 bd_release(rzs->backing_swap);
1071 filp_close(rzs->swap_file, NULL);
1072 rzs->backing_swap = NULL;
Nitin Guptac25d75a2010-01-28 21:19:59 +05301073 memset(rzs->backing_swap_name, 0, MAX_SWAP_NAME_LEN);
Nitin Gupta306b0c92009-09-22 10:26:53 +05301074 }
1075
1076 /* Reset stats */
1077 memset(&rzs->stats, 0, sizeof(rzs->stats));
1078
1079 rzs->disksize = 0;
1080 rzs->memlimit = 0;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301081}
1082
1083static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1084{
1085 int ret;
1086 size_t num_pages;
1087 struct page *page;
1088 union swap_header *swap_header;
1089
1090 if (rzs->init_done) {
1091 pr_info("Device already initialized!\n");
1092 return -EBUSY;
1093 }
1094
1095 ret = setup_backing_swap(rzs);
1096 if (ret)
1097 goto fail;
1098
1099 if (rzs->backing_swap)
1100 ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
1101 else
1102 ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1103
1104 rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1105 if (!rzs->compress_workmem) {
1106 pr_err("Error allocating compressor working memory!\n");
1107 ret = -ENOMEM;
1108 goto fail;
1109 }
1110
1111 rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
1112 if (!rzs->compress_buffer) {
1113 pr_err("Error allocating compressor buffer space\n");
1114 ret = -ENOMEM;
1115 goto fail;
1116 }
1117
1118 num_pages = rzs->disksize >> PAGE_SHIFT;
1119 rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
1120 if (!rzs->table) {
1121 pr_err("Error allocating ramzswap address table\n");
1122 /* To prevent accessing table entries during cleanup */
1123 rzs->disksize = 0;
1124 ret = -ENOMEM;
1125 goto fail;
1126 }
1127 memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
1128
1129 map_backing_swap_extents(rzs);
1130
1131 page = alloc_page(__GFP_ZERO);
1132 if (!page) {
1133 pr_err("Error allocating swap header page\n");
1134 ret = -ENOMEM;
1135 goto fail;
1136 }
1137 rzs->table[0].page = page;
1138 rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
1139
1140 swap_header = kmap(page);
1141 ret = setup_swap_header(rzs, swap_header);
1142 kunmap(page);
1143 if (ret) {
1144 pr_err("Error setting swap header\n");
1145 goto fail;
1146 }
1147
1148 set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
1149
1150 /*
1151 * We have ident mapping of sectors for ramzswap and
1152 * and the backing swap device. So, this queue flag
1153 * should be according to backing dev.
1154 */
1155 if (!rzs->backing_swap ||
1156 blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
1157 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1158
1159 rzs->mem_pool = xv_create_pool();
1160 if (!rzs->mem_pool) {
1161 pr_err("Error creating memory pool\n");
1162 ret = -ENOMEM;
1163 goto fail;
1164 }
1165
1166 /*
1167 * Pages that compress to size greater than this are forwarded
1168 * to physical swap disk (if backing dev is provided)
1169 * TODO: make this configurable
1170 */
1171 if (rzs->backing_swap)
1172 max_zpage_size = max_zpage_size_bdev;
1173 else
1174 max_zpage_size = max_zpage_size_nobdev;
1175 pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
1176
1177 rzs->init_done = 1;
1178
1179 pr_debug("Initialization done!\n");
1180 return 0;
1181
1182fail:
1183 reset_device(rzs);
1184
1185 pr_err("Initialization failed: err=%d\n", ret);
1186 return ret;
1187}
1188
1189static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
1190{
1191 if (rzs->init_done)
1192 reset_device(rzs);
1193
1194 return 0;
1195}
1196
1197static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1198 unsigned int cmd, unsigned long arg)
1199{
1200 int ret = 0;
1201 size_t disksize_kb, memlimit_kb;
1202
1203 struct ramzswap *rzs = bdev->bd_disk->private_data;
1204
1205 switch (cmd) {
1206 case RZSIO_SET_DISKSIZE_KB:
1207 if (rzs->init_done) {
1208 ret = -EBUSY;
1209 goto out;
1210 }
1211 if (copy_from_user(&disksize_kb, (void *)arg,
1212 _IOC_SIZE(cmd))) {
1213 ret = -EFAULT;
1214 goto out;
1215 }
1216 rzs->disksize = disksize_kb << 10;
1217 pr_info("Disk size set to %zu kB\n", disksize_kb);
1218 break;
1219
1220 case RZSIO_SET_MEMLIMIT_KB:
1221 if (rzs->init_done) {
1222 /* TODO: allow changing memlimit */
1223 ret = -EBUSY;
1224 goto out;
1225 }
1226 if (copy_from_user(&memlimit_kb, (void *)arg,
1227 _IOC_SIZE(cmd))) {
1228 ret = -EFAULT;
1229 goto out;
1230 }
1231 rzs->memlimit = memlimit_kb << 10;
1232 pr_info("Memory limit set to %zu kB\n", memlimit_kb);
1233 break;
1234
1235 case RZSIO_SET_BACKING_SWAP:
1236 if (rzs->init_done) {
1237 ret = -EBUSY;
1238 goto out;
1239 }
1240
1241 if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
1242 _IOC_SIZE(cmd))) {
1243 ret = -EFAULT;
1244 goto out;
1245 }
1246 rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1247 pr_info("Backing swap set to %s\n", rzs->backing_swap_name);
1248 break;
1249
1250 case RZSIO_GET_STATS:
1251 {
1252 struct ramzswap_ioctl_stats *stats;
1253 if (!rzs->init_done) {
1254 ret = -ENOTTY;
1255 goto out;
1256 }
1257 stats = kzalloc(sizeof(*stats), GFP_KERNEL);
1258 if (!stats) {
1259 ret = -ENOMEM;
1260 goto out;
1261 }
1262 ramzswap_ioctl_get_stats(rzs, stats);
1263 if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
1264 kfree(stats);
1265 ret = -EFAULT;
1266 goto out;
1267 }
1268 kfree(stats);
1269 break;
1270 }
1271 case RZSIO_INIT:
1272 ret = ramzswap_ioctl_init_device(rzs);
1273 break;
1274
1275 case RZSIO_RESET:
1276 /* Do not reset an active device! */
1277 if (bdev->bd_holders) {
1278 ret = -EBUSY;
1279 goto out;
1280 }
Nitin Gupta7eef7532010-01-28 21:13:38 +05301281
1282 /* Make sure all pending I/O is finished */
1283 if (bdev)
1284 fsync_bdev(bdev);
1285
Nitin Gupta306b0c92009-09-22 10:26:53 +05301286 ret = ramzswap_ioctl_reset_device(rzs);
1287 break;
1288
1289 default:
1290 pr_info("Invalid ioctl %u\n", cmd);
1291 ret = -ENOTTY;
1292 }
1293
1294out:
1295 return ret;
1296}
1297
1298static struct block_device_operations ramzswap_devops = {
1299 .ioctl = ramzswap_ioctl,
1300 .owner = THIS_MODULE,
1301};
1302
Minchan Kim3bf040c2010-01-11 16:15:53 +09001303static int create_device(struct ramzswap *rzs, int device_id)
Nitin Gupta306b0c92009-09-22 10:26:53 +05301304{
Nitin Guptade1a21a2010-01-28 21:13:40 +05301305 int ret = 0;
1306
Nitin Gupta306b0c92009-09-22 10:26:53 +05301307 mutex_init(&rzs->lock);
Nitin Gupta6a907722010-01-28 21:13:37 +05301308 spin_lock_init(&rzs->stat64_lock);
Nitin Gupta306b0c92009-09-22 10:26:53 +05301309 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1310
1311 rzs->queue = blk_alloc_queue(GFP_KERNEL);
1312 if (!rzs->queue) {
1313 pr_err("Error allocating disk queue for device %d\n",
1314 device_id);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301315 ret = -ENOMEM;
1316 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301317 }
1318
1319 blk_queue_make_request(rzs->queue, ramzswap_make_request);
1320 rzs->queue->queuedata = rzs;
1321
1322 /* gendisk structure */
1323 rzs->disk = alloc_disk(1);
1324 if (!rzs->disk) {
1325 blk_cleanup_queue(rzs->queue);
1326 pr_warning("Error allocating disk structure for device %d\n",
1327 device_id);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301328 ret = -ENOMEM;
1329 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301330 }
1331
1332 rzs->disk->major = ramzswap_major;
1333 rzs->disk->first_minor = device_id;
1334 rzs->disk->fops = &ramzswap_devops;
1335 rzs->disk->queue = rzs->queue;
1336 rzs->disk->private_data = rzs;
1337 snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
1338
1339 /*
1340 * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
1341 * or set equal to backing swap device (if provided)
1342 */
1343 set_capacity(rzs->disk, 0);
Nitin Gupta5d83d5a2010-01-28 21:13:39 +05301344
1345 blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
1346 blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
1347
Nitin Gupta306b0c92009-09-22 10:26:53 +05301348 add_disk(rzs->disk);
1349
1350 rzs->init_done = 0;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301351
1352out:
1353 return ret;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301354}
1355
1356static void destroy_device(struct ramzswap *rzs)
1357{
1358 if (rzs->disk) {
1359 del_gendisk(rzs->disk);
1360 put_disk(rzs->disk);
1361 }
1362
1363 if (rzs->queue)
1364 blk_cleanup_queue(rzs->queue);
1365}
1366
1367static int __init ramzswap_init(void)
1368{
Nitin Guptade1a21a2010-01-28 21:13:40 +05301369 int ret, dev_id;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301370
1371 if (num_devices > max_num_devices) {
1372 pr_warning("Invalid value for num_devices: %u\n",
1373 num_devices);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301374 ret = -EINVAL;
1375 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301376 }
1377
1378 ramzswap_major = register_blkdev(0, "ramzswap");
1379 if (ramzswap_major <= 0) {
1380 pr_warning("Unable to get major number\n");
Nitin Guptade1a21a2010-01-28 21:13:40 +05301381 ret = -EBUSY;
1382 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301383 }
1384
1385 if (!num_devices) {
1386 pr_info("num_devices not specified. Using default: 1\n");
1387 num_devices = 1;
1388 }
1389
1390 /* Allocate the device array and initialize each one */
1391 pr_info("Creating %u devices ...\n", num_devices);
1392 devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301393 if (!devices) {
1394 ret = -ENOMEM;
1395 goto unregister;
1396 }
Nitin Gupta306b0c92009-09-22 10:26:53 +05301397
Nitin Guptade1a21a2010-01-28 21:13:40 +05301398 for (dev_id = 0; dev_id < num_devices; dev_id++) {
1399 ret = create_device(&devices[dev_id], dev_id);
1400 if (ret)
Minchan Kim3bf040c2010-01-11 16:15:53 +09001401 goto free_devices;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301402 }
1403
Nitin Gupta306b0c92009-09-22 10:26:53 +05301404 return 0;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301405
Minchan Kim3bf040c2010-01-11 16:15:53 +09001406free_devices:
Nitin Guptade1a21a2010-01-28 21:13:40 +05301407 while (dev_id)
1408 destroy_device(&devices[--dev_id]);
1409unregister:
Nitin Gupta306b0c92009-09-22 10:26:53 +05301410 unregister_blkdev(ramzswap_major, "ramzswap");
Nitin Guptade1a21a2010-01-28 21:13:40 +05301411out:
Nitin Gupta306b0c92009-09-22 10:26:53 +05301412 return ret;
1413}
1414
1415static void __exit ramzswap_exit(void)
1416{
1417 int i;
1418 struct ramzswap *rzs;
1419
1420 for (i = 0; i < num_devices; i++) {
1421 rzs = &devices[i];
1422
1423 destroy_device(rzs);
1424 if (rzs->init_done)
1425 reset_device(rzs);
1426 }
1427
1428 unregister_blkdev(ramzswap_major, "ramzswap");
1429
1430 kfree(devices);
1431 pr_debug("Cleanup done!\n");
1432}
1433
1434module_param(num_devices, uint, 0);
1435MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
1436
1437module_init(ramzswap_init);
1438module_exit(ramzswap_exit);
1439
1440MODULE_LICENSE("Dual BSD/GPL");
1441MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1442MODULE_DESCRIPTION("Compressed RAM Based Swap Device");