blob: 5e422e254ee8931e561cce301bfc2654ce958bba [file] [log] [blame]
Nitin Gupta306b0c92009-09-22 10:26:53 +05301/*
2 * Compressed RAM based swap device
3 *
Nitin Gupta1130ebb2010-01-28 21:21:35 +05304 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
Nitin Gupta306b0c92009-09-22 10:26:53 +05305 *
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
8 *
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
11 *
12 * Project home: http://compcache.googlecode.com
13 */
14
15#define KMSG_COMPONENT "ramzswap"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
20#include <linux/bitops.h>
21#include <linux/blkdev.h>
22#include <linux/buffer_head.h>
23#include <linux/device.h>
24#include <linux/genhd.h>
25#include <linux/highmem.h>
26#include <linux/lzo.h>
Nitin Gupta306b0c92009-09-22 10:26:53 +053027#include <linux/string.h>
28#include <linux/swap.h>
29#include <linux/swapops.h>
30#include <linux/vmalloc.h>
Nitin Gupta306b0c92009-09-22 10:26:53 +053031
32#include "ramzswap_drv.h"
33
34/* Globals */
35static int ramzswap_major;
36static struct ramzswap *devices;
37
38/*
39 * Pages that compress to larger than this size are
40 * forwarded to backing swap, if present or stored
41 * uncompressed in memory otherwise.
42 */
43static unsigned int max_zpage_size;
44
45/* Module params (documentation at end) */
46static unsigned int num_devices;
47
48static int rzs_test_flag(struct ramzswap *rzs, u32 index,
49 enum rzs_pageflags flag)
50{
51 return rzs->table[index].flags & BIT(flag);
52}
53
54static void rzs_set_flag(struct ramzswap *rzs, u32 index,
55 enum rzs_pageflags flag)
56{
57 rzs->table[index].flags |= BIT(flag);
58}
59
60static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
61 enum rzs_pageflags flag)
62{
63 rzs->table[index].flags &= ~BIT(flag);
64}
65
66static int page_zero_filled(void *ptr)
67{
68 unsigned int pos;
69 unsigned long *page;
70
71 page = (unsigned long *)ptr;
72
73 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
74 if (page[pos])
75 return 0;
76 }
77
78 return 1;
79}
80
81/*
82 * memlimit cannot be greater than backing disk size.
83 */
84static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
85{
86 int memlimit_valid = 1;
87
88 if (!rzs->memlimit) {
89 pr_info("Memory limit not set.\n");
90 memlimit_valid = 0;
91 }
92
93 if (rzs->memlimit > rzs->disksize) {
94 pr_info("Memory limit cannot be greater than "
95 "disksize: limit=%zu, disksize=%zu\n",
96 rzs->memlimit, rzs->disksize);
97 memlimit_valid = 0;
98 }
99
100 if (!memlimit_valid) {
101 size_t mempart, disksize;
102 pr_info("Using default: smaller of (%u%% of RAM) and "
103 "(backing disk size).\n",
104 default_memlimit_perc_ram);
105 mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
106 disksize = rzs->disksize;
107 rzs->memlimit = mempart > disksize ? disksize : mempart;
108 }
109
110 if (rzs->memlimit > totalram_bytes / 2) {
111 pr_info(
112 "Its not advisable setting limit more than half of "
113 "size of memory since we expect a 2:1 compression ratio. "
114 "Limit represents amount of *compressed* data we can keep "
115 "in memory!\n"
116 "\tMemory Size: %zu kB\n"
117 "\tLimit you selected: %zu kB\n"
118 "Continuing anyway ...\n",
119 totalram_bytes >> 10, rzs->memlimit >> 10
120 );
121 }
122
123 rzs->memlimit &= PAGE_MASK;
124 BUG_ON(!rzs->memlimit);
125}
126
127static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
128{
129 if (!rzs->disksize) {
130 pr_info(
131 "disk size not provided. You can use disksize_kb module "
132 "param to specify size.\nUsing default: (%u%% of RAM).\n",
133 default_disksize_perc_ram
134 );
135 rzs->disksize = default_disksize_perc_ram *
136 (totalram_bytes / 100);
137 }
138
139 if (rzs->disksize > 2 * (totalram_bytes)) {
140 pr_info(
141 "There is little point creating a ramzswap of greater than "
142 "twice the size of memory since we expect a 2:1 compression "
143 "ratio. Note that ramzswap uses about 0.1%% of the size of "
144 "the swap device when not in use so a huge ramzswap is "
145 "wasteful.\n"
146 "\tMemory Size: %zu kB\n"
147 "\tSize you selected: %zu kB\n"
148 "Continuing anyway ...\n",
149 totalram_bytes >> 10, rzs->disksize
150 );
151 }
152
153 rzs->disksize &= PAGE_MASK;
154}
155
156/*
157 * Swap header (1st page of swap device) contains information
158 * to indentify it as a swap partition. Prepare such a header
159 * for ramzswap device (ramzswap0) so that swapon can identify
160 * it as swap partition. In case backing swap device is provided,
161 * copy its swap header.
162 */
163static int setup_swap_header(struct ramzswap *rzs, union swap_header *s)
164{
165 int ret = 0;
166 struct page *page;
167 struct address_space *mapping;
168 union swap_header *backing_swap_header;
169
170 /*
171 * There is no backing swap device. Create a swap header
172 * that is acceptable by swapon.
173 */
174 if (!rzs->backing_swap) {
175 s->info.version = 1;
176 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
177 s->info.nr_badpages = 0;
178 memcpy(s->magic.magic, "SWAPSPACE2", 10);
179 return 0;
180 }
181
182 /*
183 * We have a backing swap device. Copy its swap header
184 * to ramzswap device header. If this header contains
185 * invalid information (backing device not a swap
186 * partition, etc.), swapon will fail for ramzswap
187 * which is correct behavior - we don't want to swap
188 * over filesystem partition!
189 */
190
191 /* Read the backing swap header (code from sys_swapon) */
192 mapping = rzs->swap_file->f_mapping;
193 if (!mapping->a_ops->readpage) {
194 ret = -EINVAL;
195 goto out;
196 }
197
198 page = read_mapping_page(mapping, 0, rzs->swap_file);
199 if (IS_ERR(page)) {
200 ret = PTR_ERR(page);
201 goto out;
202 }
203
204 backing_swap_header = kmap(page);
205 memcpy(s, backing_swap_header, sizeof(*s));
206 if (s->info.nr_badpages) {
207 pr_info("Cannot use backing swap with bad pages (%u)\n",
208 s->info.nr_badpages);
209 ret = -EINVAL;
210 }
211 /*
212 * ramzswap disksize equals number of usable pages in backing
213 * swap. Set last_page in swap header to match this disksize
214 * ('last_page' means 0-based index of last usable swap page).
215 */
216 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
217 kunmap(page);
218
219out:
220 return ret;
221}
222
Nitin Guptac25d75a2010-01-28 21:19:59 +0530223static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
Nitin Gupta306b0c92009-09-22 10:26:53 +0530224 struct ramzswap_ioctl_stats *s)
225{
226 strncpy(s->backing_swap_name, rzs->backing_swap_name,
227 MAX_SWAP_NAME_LEN - 1);
228 s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
229
230 s->disksize = rzs->disksize;
231 s->memlimit = rzs->memlimit;
232
233#if defined(CONFIG_RAMZSWAP_STATS)
234 {
235 struct ramzswap_stats *rs = &rzs->stats;
236 size_t succ_writes, mem_used;
237 unsigned int good_compress_perc = 0, no_compress_perc = 0;
238
239 mem_used = xv_get_total_size_bytes(rzs->mem_pool)
240 + (rs->pages_expand << PAGE_SHIFT);
Nitin Gupta6a907722010-01-28 21:13:37 +0530241 succ_writes = rzs_stat64_read(rzs, &rs->num_writes) -
242 rzs_stat64_read(rzs, &rs->failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530243
244 if (succ_writes && rs->pages_stored) {
245 good_compress_perc = rs->good_compress * 100
246 / rs->pages_stored;
247 no_compress_perc = rs->pages_expand * 100
248 / rs->pages_stored;
249 }
250
Nitin Gupta6a907722010-01-28 21:13:37 +0530251 s->num_reads = rzs_stat64_read(rzs, &rs->num_reads);
252 s->num_writes = rzs_stat64_read(rzs, &rs->num_writes);
253 s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads);
254 s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes);
255 s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io);
256 s->notify_free = rzs_stat64_read(rzs, &rs->notify_free);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530257 s->pages_zero = rs->pages_zero;
258
259 s->good_compress_pct = good_compress_perc;
260 s->pages_expand_pct = no_compress_perc;
261
262 s->pages_stored = rs->pages_stored;
263 s->pages_used = mem_used >> PAGE_SHIFT;
264 s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
265 s->compr_data_size = rs->compr_size;
266 s->mem_used_total = mem_used;
267
Nitin Gupta6a907722010-01-28 21:13:37 +0530268 s->bdev_num_reads = rzs_stat64_read(rzs, &rs->bdev_num_reads);
269 s->bdev_num_writes = rzs_stat64_read(rzs, &rs->bdev_num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530270 }
271#endif /* CONFIG_RAMZSWAP_STATS */
272}
273
274static int add_backing_swap_extent(struct ramzswap *rzs,
275 pgoff_t phy_pagenum,
276 pgoff_t num_pages)
277{
278 unsigned int idx;
279 struct list_head *head;
280 struct page *curr_page, *new_page;
281 unsigned int extents_per_page = PAGE_SIZE /
282 sizeof(struct ramzswap_backing_extent);
283
284 idx = rzs->num_extents % extents_per_page;
285 if (!idx) {
286 new_page = alloc_page(__GFP_ZERO);
287 if (!new_page)
288 return -ENOMEM;
289
290 if (rzs->num_extents) {
291 curr_page = virt_to_page(rzs->curr_extent);
292 head = &curr_page->lru;
293 } else {
294 head = &rzs->backing_swap_extent_list;
295 }
296
297 list_add(&new_page->lru, head);
298 rzs->curr_extent = page_address(new_page);
299 }
300
301 rzs->curr_extent->phy_pagenum = phy_pagenum;
302 rzs->curr_extent->num_pages = num_pages;
303
304 pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
305 "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
306 phy_pagenum + num_pages - 1, rzs->curr_extent);
307
308 if (idx != extents_per_page - 1)
309 rzs->curr_extent++;
310
311 return 0;
312}
313
314static int setup_backing_swap_extents(struct ramzswap *rzs,
315 struct inode *inode, unsigned long *num_pages)
316{
317 int ret = 0;
318 unsigned blkbits;
319 unsigned blocks_per_page;
320 pgoff_t contig_pages = 0, total_pages = 0;
321 pgoff_t pagenum = 0, prev_pagenum = 0;
322 sector_t probe_block = 0;
323 sector_t last_block;
324
325 blkbits = inode->i_blkbits;
326 blocks_per_page = PAGE_SIZE >> blkbits;
327
328 last_block = i_size_read(inode) >> blkbits;
329 while (probe_block + blocks_per_page <= last_block) {
330 unsigned block_in_page;
331 sector_t first_block;
332
333 first_block = bmap(inode, probe_block);
334 if (first_block == 0)
335 goto bad_bmap;
336
337 /* It must be PAGE_SIZE aligned on-disk */
338 if (first_block & (blocks_per_page - 1)) {
339 probe_block++;
340 goto probe_next;
341 }
342
343 /* All blocks within this page must be contiguous on disk */
344 for (block_in_page = 1; block_in_page < blocks_per_page;
345 block_in_page++) {
346 sector_t block;
347
348 block = bmap(inode, probe_block + block_in_page);
349 if (block == 0)
350 goto bad_bmap;
351 if (block != first_block + block_in_page) {
352 /* Discontiguity */
353 probe_block++;
354 goto probe_next;
355 }
356 }
357
358 /*
359 * We found a PAGE_SIZE length, PAGE_SIZE aligned
360 * run of blocks.
361 */
362 pagenum = first_block >> (PAGE_SHIFT - blkbits);
363
364 if (total_pages && (pagenum != prev_pagenum + 1)) {
365 ret = add_backing_swap_extent(rzs, prev_pagenum -
366 (contig_pages - 1), contig_pages);
367 if (ret < 0)
368 goto out;
369 rzs->num_extents++;
370 contig_pages = 0;
371 }
372 total_pages++;
373 contig_pages++;
374 prev_pagenum = pagenum;
375 probe_block += blocks_per_page;
376
377probe_next:
378 continue;
379 }
380
381 if (contig_pages) {
382 pr_debug("adding last extent: pagenum=%lu, "
383 "contig_pages=%lu\n", pagenum, contig_pages);
384 ret = add_backing_swap_extent(rzs,
385 prev_pagenum - (contig_pages - 1), contig_pages);
386 if (ret < 0)
387 goto out;
388 rzs->num_extents++;
389 }
390 if (!rzs->num_extents) {
391 pr_err("No swap extents found!\n");
392 ret = -EINVAL;
393 }
394
395 if (!ret) {
396 *num_pages = total_pages;
397 pr_info("Found %lu extents containing %luk\n",
398 rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
399 }
400 goto out;
401
402bad_bmap:
403 pr_err("Backing swapfile has holes\n");
404 ret = -EINVAL;
405out:
406 while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
407 struct page *page;
408 struct list_head *entry = rzs->backing_swap_extent_list.next;
409 page = list_entry(entry, struct page, lru);
410 list_del(entry);
411 __free_page(page);
412 }
413 return ret;
414}
415
416static void map_backing_swap_extents(struct ramzswap *rzs)
417{
418 struct ramzswap_backing_extent *se;
419 struct page *table_page, *se_page;
420 unsigned long num_pages, num_table_pages, entry;
421 unsigned long se_idx, span;
422 unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
423 unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
424
425 /* True for block device */
426 if (!rzs->num_extents)
427 return;
428
429 se_page = list_entry(rzs->backing_swap_extent_list.next,
430 struct page, lru);
431 se = page_address(se_page);
432 span = se->num_pages;
433 num_pages = rzs->disksize >> PAGE_SHIFT;
434 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
435 PAGE_SIZE);
436
437 entry = 0;
438 se_idx = 0;
439 while (num_table_pages--) {
440 table_page = vmalloc_to_page(&rzs->table[entry]);
441 while (span <= entry) {
442 se_idx++;
443 if (se_idx == rzs->num_extents)
444 BUG();
445
446 if (!(se_idx % extents_per_page)) {
447 se_page = list_entry(se_page->lru.next,
448 struct page, lru);
449 se = page_address(se_page);
450 } else
451 se++;
452
453 span += se->num_pages;
454 }
455 table_page->mapping = (struct address_space *)se;
456 table_page->private = se->num_pages - (span - entry);
457 pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
458 entry, span, table_page->mapping, table_page->private);
459 entry += entries_per_page;
460 }
461}
462
463/*
464 * Check if value of backing_swap module param is sane.
465 * Claim this device and set ramzswap size equal to
466 * size of this block device.
467 */
468static int setup_backing_swap(struct ramzswap *rzs)
469{
470 int ret = 0;
471 size_t disksize;
472 unsigned long num_pages = 0;
473 struct inode *inode;
474 struct file *swap_file;
475 struct address_space *mapping;
476 struct block_device *bdev = NULL;
477
478 if (!rzs->backing_swap_name[0]) {
479 pr_debug("backing_swap param not given\n");
480 goto out;
481 }
482
483 pr_info("Using backing swap device: %s\n", rzs->backing_swap_name);
484
485 swap_file = filp_open(rzs->backing_swap_name,
486 O_RDWR | O_LARGEFILE, 0);
487 if (IS_ERR(swap_file)) {
488 pr_err("Error opening backing device: %s\n",
489 rzs->backing_swap_name);
490 ret = -EINVAL;
491 goto out;
492 }
493
494 mapping = swap_file->f_mapping;
495 inode = mapping->host;
496
497 if (S_ISBLK(inode->i_mode)) {
498 bdev = I_BDEV(inode);
499 ret = bd_claim(bdev, setup_backing_swap);
500 if (ret < 0) {
501 bdev = NULL;
502 goto bad_param;
503 }
504 disksize = i_size_read(inode);
Nitin Guptac25d75a2010-01-28 21:19:59 +0530505 /*
506 * Can happen if user gives an extended partition as
507 * backing swap or simply a bad disk.
508 */
509 if (!disksize) {
510 pr_err("Error reading backing swap size.\n");
511 goto bad_param;
512 }
Nitin Gupta306b0c92009-09-22 10:26:53 +0530513 } else if (S_ISREG(inode->i_mode)) {
514 bdev = inode->i_sb->s_bdev;
515 if (IS_SWAPFILE(inode)) {
516 ret = -EBUSY;
517 goto bad_param;
518 }
519 ret = setup_backing_swap_extents(rzs, inode, &num_pages);
520 if (ret < 0)
521 goto bad_param;
522 disksize = num_pages << PAGE_SHIFT;
523 } else {
524 goto bad_param;
525 }
526
527 rzs->swap_file = swap_file;
528 rzs->backing_swap = bdev;
529 rzs->disksize = disksize;
Nitin Gupta306b0c92009-09-22 10:26:53 +0530530
531 return 0;
532
533bad_param:
534 if (bdev)
535 bd_release(bdev);
536 filp_close(swap_file, NULL);
537
538out:
539 rzs->backing_swap = NULL;
540 return ret;
541}
542
543/*
544 * Map logical page number 'pagenum' to physical page number
545 * on backing swap device. For block device, this is a nop.
546 */
Nitin Guptac25d75a2010-01-28 21:19:59 +0530547static u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
Nitin Gupta306b0c92009-09-22 10:26:53 +0530548{
549 u32 skip_pages, entries_per_page;
550 size_t delta, se_offset, skipped;
551 struct page *table_page, *se_page;
552 struct ramzswap_backing_extent *se;
553
554 if (!rzs->num_extents)
555 return pagenum;
556
557 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
558
559 table_page = vmalloc_to_page(&rzs->table[pagenum]);
560 se = (struct ramzswap_backing_extent *)table_page->mapping;
561 se_page = virt_to_page(se);
562
563 skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
564 se_offset = table_page->private + skip_pages;
565
566 if (se_offset < se->num_pages)
567 return se->phy_pagenum + se_offset;
568
569 skipped = se->num_pages - table_page->private;
570 do {
571 struct ramzswap_backing_extent *se_base;
572 u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
573
574 /* Get next swap extent */
575 se_base = (struct ramzswap_backing_extent *)
576 page_address(se_page);
577 if (se - se_base == se_entries_per_page - 1) {
578 se_page = list_entry(se_page->lru.next,
579 struct page, lru);
580 se = page_address(se_page);
581 } else {
582 se++;
583 }
584
585 skipped += se->num_pages;
586 } while (skipped < skip_pages);
587
588 delta = skipped - skip_pages;
589 se_offset = se->num_pages - delta;
590
591 return se->phy_pagenum + se_offset;
592}
593
594static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
595{
596 u32 clen;
597 void *obj;
598
599 struct page *page = rzs->table[index].page;
600 u32 offset = rzs->table[index].offset;
601
602 if (unlikely(!page)) {
Nitin Gupta2e882282010-01-28 21:13:41 +0530603 /*
604 * No memory is allocated for zero filled pages.
605 * Simply clear zero page flag.
606 */
Nitin Gupta306b0c92009-09-22 10:26:53 +0530607 if (rzs_test_flag(rzs, index, RZS_ZERO)) {
608 rzs_clear_flag(rzs, index, RZS_ZERO);
Nitin Gupta6a907722010-01-28 21:13:37 +0530609 rzs_stat_dec(&rzs->stats.pages_zero);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530610 }
611 return;
612 }
613
614 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
615 clen = PAGE_SIZE;
616 __free_page(page);
617 rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
Nitin Gupta6a907722010-01-28 21:13:37 +0530618 rzs_stat_dec(&rzs->stats.pages_expand);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530619 goto out;
620 }
621
622 obj = kmap_atomic(page, KM_USER0) + offset;
623 clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
624 kunmap_atomic(obj, KM_USER0);
625
626 xv_free(rzs->mem_pool, page, offset);
627 if (clen <= PAGE_SIZE / 2)
Nitin Gupta6a907722010-01-28 21:13:37 +0530628 rzs_stat_dec(&rzs->stats.good_compress);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530629
630out:
631 rzs->stats.compr_size -= clen;
Nitin Gupta6a907722010-01-28 21:13:37 +0530632 rzs_stat_dec(&rzs->stats.pages_stored);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530633
634 rzs->table[index].page = NULL;
635 rzs->table[index].offset = 0;
636}
637
638static int handle_zero_page(struct bio *bio)
639{
640 void *user_mem;
641 struct page *page = bio->bi_io_vec[0].bv_page;
642
643 user_mem = kmap_atomic(page, KM_USER0);
644 memset(user_mem, 0, PAGE_SIZE);
645 kunmap_atomic(user_mem, KM_USER0);
646
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530647 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530648
649 set_bit(BIO_UPTODATE, &bio->bi_flags);
650 bio_endio(bio, 0);
651 return 0;
652}
653
654static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
655{
656 u32 index;
657 struct page *page;
658 unsigned char *user_mem, *cmem;
659
660 page = bio->bi_io_vec[0].bv_page;
661 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
662
663 user_mem = kmap_atomic(page, KM_USER0);
664 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
665 rzs->table[index].offset;
666
667 memcpy(user_mem, cmem, PAGE_SIZE);
668 kunmap_atomic(user_mem, KM_USER0);
669 kunmap_atomic(cmem, KM_USER1);
670
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530671 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530672
673 set_bit(BIO_UPTODATE, &bio->bi_flags);
674 bio_endio(bio, 0);
675 return 0;
676}
677
Nitin Gupta306b0c92009-09-22 10:26:53 +0530678/*
679 * Called when request page is not present in ramzswap.
680 * Its either in backing swap device (if present) or
681 * this is an attempt to read before any previous write
682 * to this location - this happens due to readahead when
683 * swap device is read from user-space (e.g. during swapon)
684 */
685static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
686{
687 /*
688 * Always forward such requests to backing swap
689 * device (if present)
690 */
691 if (rzs->backing_swap) {
692 u32 pagenum;
Nitin Gupta6a907722010-01-28 21:13:37 +0530693 rzs_stat64_dec(rzs, &rzs->stats.num_reads);
694 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530695 bio->bi_bdev = rzs->backing_swap;
696
697 /*
698 * In case backing swap is a file, find the right offset within
699 * the file corresponding to logical position 'index'. For block
700 * device, this is a nop.
701 */
702 pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
703 bio->bi_sector = map_backing_swap_page(rzs, pagenum)
704 << SECTORS_PER_PAGE_SHIFT;
705 return 1;
706 }
707
708 /*
709 * Its unlikely event in case backing dev is
710 * not present
711 */
712 pr_debug("Read before write on swap device: "
713 "sector=%lu, size=%u, offset=%u\n",
714 (ulong)(bio->bi_sector), bio->bi_size,
715 bio->bi_io_vec[0].bv_offset);
716
717 /* Do nothing. Just return success */
718 set_bit(BIO_UPTODATE, &bio->bi_flags);
719 bio_endio(bio, 0);
720 return 0;
721}
722
723static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
724{
725 int ret;
726 u32 index;
727 size_t clen;
728 struct page *page;
729 struct zobj_header *zheader;
730 unsigned char *user_mem, *cmem;
731
Nitin Gupta6a907722010-01-28 21:13:37 +0530732 rzs_stat64_inc(rzs, &rzs->stats.num_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530733
734 page = bio->bi_io_vec[0].bv_page;
735 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
736
737 if (rzs_test_flag(rzs, index, RZS_ZERO))
738 return handle_zero_page(bio);
739
740 /* Requested page is not present in compressed area */
741 if (!rzs->table[index].page)
742 return handle_ramzswap_fault(rzs, bio);
743
C ypef4ffb72010-01-06 13:42:00 +0100744 /* Page is stored uncompressed since it's incompressible */
Nitin Gupta306b0c92009-09-22 10:26:53 +0530745 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
746 return handle_uncompressed_page(rzs, bio);
747
748 user_mem = kmap_atomic(page, KM_USER0);
749 clen = PAGE_SIZE;
750
751 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
752 rzs->table[index].offset;
753
754 ret = lzo1x_decompress_safe(
755 cmem + sizeof(*zheader),
756 xv_get_object_size(cmem) - sizeof(*zheader),
757 user_mem, &clen);
758
759 kunmap_atomic(user_mem, KM_USER0);
760 kunmap_atomic(cmem, KM_USER1);
761
762 /* should NEVER happen */
763 if (unlikely(ret != LZO_E_OK)) {
764 pr_err("Decompression failed! err=%d, page=%u\n",
765 ret, index);
Nitin Gupta6a907722010-01-28 21:13:37 +0530766 rzs_stat64_inc(rzs, &rzs->stats.failed_reads);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530767 goto out;
768 }
769
Nitin Gupta30fb8a72009-12-12 11:44:46 +0530770 flush_dcache_page(page);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530771
772 set_bit(BIO_UPTODATE, &bio->bi_flags);
773 bio_endio(bio, 0);
774 return 0;
775
776out:
777 bio_io_error(bio);
778 return 0;
779}
780
781static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
782{
783 int ret, fwd_write_request = 0;
784 u32 offset, index;
785 size_t clen;
786 struct zobj_header *zheader;
787 struct page *page, *page_store;
788 unsigned char *user_mem, *cmem, *src;
789
Nitin Gupta6a907722010-01-28 21:13:37 +0530790 rzs_stat64_inc(rzs, &rzs->stats.num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530791
792 page = bio->bi_io_vec[0].bv_page;
793 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
794
795 src = rzs->compress_buffer;
796
797 /*
798 * System swaps to same sector again when the stored page
799 * is no longer referenced by any process. So, its now safe
800 * to free the memory that was allocated for this page.
801 */
Nitin Gupta2e882282010-01-28 21:13:41 +0530802 if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO))
Nitin Gupta306b0c92009-09-22 10:26:53 +0530803 ramzswap_free_page(rzs, index);
804
Nitin Gupta306b0c92009-09-22 10:26:53 +0530805 mutex_lock(&rzs->lock);
806
807 user_mem = kmap_atomic(page, KM_USER0);
808 if (page_zero_filled(user_mem)) {
809 kunmap_atomic(user_mem, KM_USER0);
810 mutex_unlock(&rzs->lock);
Nitin Gupta6a907722010-01-28 21:13:37 +0530811 rzs_stat_inc(&rzs->stats.pages_zero);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530812 rzs_set_flag(rzs, index, RZS_ZERO);
813
814 set_bit(BIO_UPTODATE, &bio->bi_flags);
815 bio_endio(bio, 0);
816 return 0;
817 }
818
819 if (rzs->backing_swap &&
820 (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
821 kunmap_atomic(user_mem, KM_USER0);
822 mutex_unlock(&rzs->lock);
823 fwd_write_request = 1;
824 goto out;
825 }
826
827 ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
828 rzs->compress_workmem);
829
830 kunmap_atomic(user_mem, KM_USER0);
831
832 if (unlikely(ret != LZO_E_OK)) {
833 mutex_unlock(&rzs->lock);
834 pr_err("Compression failed! err=%d\n", ret);
Nitin Gupta6a907722010-01-28 21:13:37 +0530835 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530836 goto out;
837 }
838
839 /*
840 * Page is incompressible. Forward it to backing swap
841 * if present. Otherwise, store it as-is (uncompressed)
842 * since we do not want to return too many swap write
843 * errors which has side effect of hanging the system.
844 */
845 if (unlikely(clen > max_zpage_size)) {
846 if (rzs->backing_swap) {
847 mutex_unlock(&rzs->lock);
848 fwd_write_request = 1;
849 goto out;
850 }
851
852 clen = PAGE_SIZE;
853 page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
854 if (unlikely(!page_store)) {
855 mutex_unlock(&rzs->lock);
856 pr_info("Error allocating memory for incompressible "
857 "page: %u\n", index);
Nitin Gupta6a907722010-01-28 21:13:37 +0530858 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530859 goto out;
860 }
861
862 offset = 0;
863 rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
Nitin Gupta6a907722010-01-28 21:13:37 +0530864 rzs_stat_inc(&rzs->stats.pages_expand);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530865 rzs->table[index].page = page_store;
866 src = kmap_atomic(page, KM_USER0);
867 goto memstore;
868 }
869
870 if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
871 &rzs->table[index].page, &offset,
872 GFP_NOIO | __GFP_HIGHMEM)) {
873 mutex_unlock(&rzs->lock);
874 pr_info("Error allocating memory for compressed "
875 "page: %u, size=%zu\n", index, clen);
Nitin Gupta6a907722010-01-28 21:13:37 +0530876 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530877 if (rzs->backing_swap)
878 fwd_write_request = 1;
879 goto out;
880 }
881
882memstore:
883 rzs->table[index].offset = offset;
884
885 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
886 rzs->table[index].offset;
887
888#if 0
889 /* Back-reference needed for memory defragmentation */
890 if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
891 zheader = (struct zobj_header *)cmem;
892 zheader->table_idx = index;
893 cmem += sizeof(*zheader);
894 }
895#endif
896
897 memcpy(cmem, src, clen);
898
899 kunmap_atomic(cmem, KM_USER1);
900 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
901 kunmap_atomic(src, KM_USER0);
902
903 /* Update stats */
904 rzs->stats.compr_size += clen;
Nitin Gupta6a907722010-01-28 21:13:37 +0530905 rzs_stat_inc(&rzs->stats.pages_stored);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530906 if (clen <= PAGE_SIZE / 2)
Nitin Gupta6a907722010-01-28 21:13:37 +0530907 rzs_stat_inc(&rzs->stats.good_compress);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530908
909 mutex_unlock(&rzs->lock);
910
911 set_bit(BIO_UPTODATE, &bio->bi_flags);
912 bio_endio(bio, 0);
913 return 0;
914
915out:
916 if (fwd_write_request) {
Nitin Gupta6a907722010-01-28 21:13:37 +0530917 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_writes);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530918 bio->bi_bdev = rzs->backing_swap;
919#if 0
920 /*
921 * TODO: We currently have linear mapping of ramzswap and
922 * backing swap sectors. This is not desired since we want
923 * to optimize writes to backing swap to minimize disk seeks
924 * or have effective wear leveling (for SSDs). Also, a
925 * non-linear mapping is required to implement compressed
926 * on-disk swapping.
927 */
928 bio->bi_sector = get_backing_swap_page()
929 << SECTORS_PER_PAGE_SHIFT;
930#endif
931 /*
932 * In case backing swap is a file, find the right offset within
933 * the file corresponding to logical position 'index'. For block
934 * device, this is a nop.
935 */
936 bio->bi_sector = map_backing_swap_page(rzs, index)
937 << SECTORS_PER_PAGE_SHIFT;
938 return 1;
939 }
940
941 bio_io_error(bio);
942 return 0;
943}
944
Nitin Gupta306b0c92009-09-22 10:26:53 +0530945/*
946 * Check if request is within bounds and page aligned.
947 */
948static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
949{
950 if (unlikely(
951 (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
952 (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
953 (bio->bi_vcnt != 1) ||
954 (bio->bi_size != PAGE_SIZE) ||
955 (bio->bi_io_vec[0].bv_offset != 0))) {
956
957 return 0;
958 }
959
960 /* swap request is valid */
961 return 1;
962}
963
964/*
965 * Handler function for all ramzswap I/O requests.
966 */
967static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
968{
969 int ret = 0;
970 struct ramzswap *rzs = queue->queuedata;
971
972 if (unlikely(!rzs->init_done)) {
973 bio_io_error(bio);
974 return 0;
975 }
976
977 if (!valid_swap_request(rzs, bio)) {
Nitin Gupta6a907722010-01-28 21:13:37 +0530978 rzs_stat64_inc(rzs, &rzs->stats.invalid_io);
Nitin Gupta306b0c92009-09-22 10:26:53 +0530979 bio_io_error(bio);
980 return 0;
981 }
982
983 switch (bio_data_dir(bio)) {
984 case READ:
985 ret = ramzswap_read(rzs, bio);
986 break;
987
988 case WRITE:
989 ret = ramzswap_write(rzs, bio);
990 break;
991 }
992
993 return ret;
994}
995
996static void reset_device(struct ramzswap *rzs)
997{
998 int is_backing_blkdev = 0;
999 size_t index, num_pages;
1000 unsigned entries_per_page;
1001 unsigned long num_table_pages, entry = 0;
1002
Nitin Gupta7eef7532010-01-28 21:13:38 +05301003 /* Do not accept any new I/O request */
1004 rzs->init_done = 0;
1005
Nitin Gupta306b0c92009-09-22 10:26:53 +05301006 if (rzs->backing_swap && !rzs->num_extents)
1007 is_backing_blkdev = 1;
1008
1009 num_pages = rzs->disksize >> PAGE_SHIFT;
1010
1011 /* Free various per-device buffers */
1012 kfree(rzs->compress_workmem);
1013 free_pages((unsigned long)rzs->compress_buffer, 1);
1014
1015 rzs->compress_workmem = NULL;
1016 rzs->compress_buffer = NULL;
1017
1018 /* Free all pages that are still in this ramzswap device */
1019 for (index = 0; index < num_pages; index++) {
1020 struct page *page;
1021 u16 offset;
1022
1023 page = rzs->table[index].page;
1024 offset = rzs->table[index].offset;
1025
1026 if (!page)
1027 continue;
1028
1029 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
1030 __free_page(page);
1031 else
1032 xv_free(rzs->mem_pool, page, offset);
1033 }
1034
1035 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
1036 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
1037 PAGE_SIZE);
1038 /*
1039 * Set page->mapping to NULL for every table page.
1040 * Otherwise, we will hit bad_page() during free.
1041 */
1042 while (rzs->num_extents && num_table_pages--) {
1043 struct page *page;
1044 page = vmalloc_to_page(&rzs->table[entry]);
1045 page->mapping = NULL;
1046 entry += entries_per_page;
1047 }
1048 vfree(rzs->table);
1049 rzs->table = NULL;
1050
1051 xv_destroy_pool(rzs->mem_pool);
1052 rzs->mem_pool = NULL;
1053
1054 /* Free all swap extent pages */
1055 while (!list_empty(&rzs->backing_swap_extent_list)) {
1056 struct page *page;
1057 struct list_head *entry;
1058 entry = rzs->backing_swap_extent_list.next;
1059 page = list_entry(entry, struct page, lru);
1060 list_del(entry);
1061 __free_page(page);
1062 }
1063 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1064 rzs->num_extents = 0;
1065
1066 /* Close backing swap device, if present */
1067 if (rzs->backing_swap) {
1068 if (is_backing_blkdev)
1069 bd_release(rzs->backing_swap);
1070 filp_close(rzs->swap_file, NULL);
1071 rzs->backing_swap = NULL;
Nitin Guptac25d75a2010-01-28 21:19:59 +05301072 memset(rzs->backing_swap_name, 0, MAX_SWAP_NAME_LEN);
Nitin Gupta306b0c92009-09-22 10:26:53 +05301073 }
1074
1075 /* Reset stats */
1076 memset(&rzs->stats, 0, sizeof(rzs->stats));
1077
1078 rzs->disksize = 0;
1079 rzs->memlimit = 0;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301080}
1081
1082static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1083{
1084 int ret;
1085 size_t num_pages;
1086 struct page *page;
1087 union swap_header *swap_header;
1088
1089 if (rzs->init_done) {
1090 pr_info("Device already initialized!\n");
1091 return -EBUSY;
1092 }
1093
1094 ret = setup_backing_swap(rzs);
1095 if (ret)
1096 goto fail;
1097
1098 if (rzs->backing_swap)
1099 ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
1100 else
1101 ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1102
1103 rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1104 if (!rzs->compress_workmem) {
1105 pr_err("Error allocating compressor working memory!\n");
1106 ret = -ENOMEM;
1107 goto fail;
1108 }
1109
1110 rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
1111 if (!rzs->compress_buffer) {
1112 pr_err("Error allocating compressor buffer space\n");
1113 ret = -ENOMEM;
1114 goto fail;
1115 }
1116
1117 num_pages = rzs->disksize >> PAGE_SHIFT;
1118 rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
1119 if (!rzs->table) {
1120 pr_err("Error allocating ramzswap address table\n");
1121 /* To prevent accessing table entries during cleanup */
1122 rzs->disksize = 0;
1123 ret = -ENOMEM;
1124 goto fail;
1125 }
1126 memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
1127
1128 map_backing_swap_extents(rzs);
1129
1130 page = alloc_page(__GFP_ZERO);
1131 if (!page) {
1132 pr_err("Error allocating swap header page\n");
1133 ret = -ENOMEM;
1134 goto fail;
1135 }
1136 rzs->table[0].page = page;
1137 rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
1138
1139 swap_header = kmap(page);
1140 ret = setup_swap_header(rzs, swap_header);
1141 kunmap(page);
1142 if (ret) {
1143 pr_err("Error setting swap header\n");
1144 goto fail;
1145 }
1146
1147 set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
1148
1149 /*
1150 * We have ident mapping of sectors for ramzswap and
1151 * and the backing swap device. So, this queue flag
1152 * should be according to backing dev.
1153 */
1154 if (!rzs->backing_swap ||
1155 blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
1156 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1157
1158 rzs->mem_pool = xv_create_pool();
1159 if (!rzs->mem_pool) {
1160 pr_err("Error creating memory pool\n");
1161 ret = -ENOMEM;
1162 goto fail;
1163 }
1164
1165 /*
1166 * Pages that compress to size greater than this are forwarded
1167 * to physical swap disk (if backing dev is provided)
1168 * TODO: make this configurable
1169 */
1170 if (rzs->backing_swap)
1171 max_zpage_size = max_zpage_size_bdev;
1172 else
1173 max_zpage_size = max_zpage_size_nobdev;
1174 pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
1175
1176 rzs->init_done = 1;
1177
1178 pr_debug("Initialization done!\n");
1179 return 0;
1180
1181fail:
1182 reset_device(rzs);
1183
1184 pr_err("Initialization failed: err=%d\n", ret);
1185 return ret;
1186}
1187
1188static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
1189{
1190 if (rzs->init_done)
1191 reset_device(rzs);
1192
1193 return 0;
1194}
1195
1196static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1197 unsigned int cmd, unsigned long arg)
1198{
1199 int ret = 0;
1200 size_t disksize_kb, memlimit_kb;
1201
1202 struct ramzswap *rzs = bdev->bd_disk->private_data;
1203
1204 switch (cmd) {
1205 case RZSIO_SET_DISKSIZE_KB:
1206 if (rzs->init_done) {
1207 ret = -EBUSY;
1208 goto out;
1209 }
1210 if (copy_from_user(&disksize_kb, (void *)arg,
1211 _IOC_SIZE(cmd))) {
1212 ret = -EFAULT;
1213 goto out;
1214 }
1215 rzs->disksize = disksize_kb << 10;
1216 pr_info("Disk size set to %zu kB\n", disksize_kb);
1217 break;
1218
1219 case RZSIO_SET_MEMLIMIT_KB:
1220 if (rzs->init_done) {
1221 /* TODO: allow changing memlimit */
1222 ret = -EBUSY;
1223 goto out;
1224 }
1225 if (copy_from_user(&memlimit_kb, (void *)arg,
1226 _IOC_SIZE(cmd))) {
1227 ret = -EFAULT;
1228 goto out;
1229 }
1230 rzs->memlimit = memlimit_kb << 10;
1231 pr_info("Memory limit set to %zu kB\n", memlimit_kb);
1232 break;
1233
1234 case RZSIO_SET_BACKING_SWAP:
1235 if (rzs->init_done) {
1236 ret = -EBUSY;
1237 goto out;
1238 }
1239
1240 if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
1241 _IOC_SIZE(cmd))) {
1242 ret = -EFAULT;
1243 goto out;
1244 }
1245 rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1246 pr_info("Backing swap set to %s\n", rzs->backing_swap_name);
1247 break;
1248
1249 case RZSIO_GET_STATS:
1250 {
1251 struct ramzswap_ioctl_stats *stats;
1252 if (!rzs->init_done) {
1253 ret = -ENOTTY;
1254 goto out;
1255 }
1256 stats = kzalloc(sizeof(*stats), GFP_KERNEL);
1257 if (!stats) {
1258 ret = -ENOMEM;
1259 goto out;
1260 }
1261 ramzswap_ioctl_get_stats(rzs, stats);
1262 if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
1263 kfree(stats);
1264 ret = -EFAULT;
1265 goto out;
1266 }
1267 kfree(stats);
1268 break;
1269 }
1270 case RZSIO_INIT:
1271 ret = ramzswap_ioctl_init_device(rzs);
1272 break;
1273
1274 case RZSIO_RESET:
1275 /* Do not reset an active device! */
1276 if (bdev->bd_holders) {
1277 ret = -EBUSY;
1278 goto out;
1279 }
Nitin Gupta7eef7532010-01-28 21:13:38 +05301280
1281 /* Make sure all pending I/O is finished */
1282 if (bdev)
1283 fsync_bdev(bdev);
1284
Nitin Gupta306b0c92009-09-22 10:26:53 +05301285 ret = ramzswap_ioctl_reset_device(rzs);
1286 break;
1287
1288 default:
1289 pr_info("Invalid ioctl %u\n", cmd);
1290 ret = -ENOTTY;
1291 }
1292
1293out:
1294 return ret;
1295}
1296
1297static struct block_device_operations ramzswap_devops = {
1298 .ioctl = ramzswap_ioctl,
1299 .owner = THIS_MODULE,
1300};
1301
Minchan Kim3bf040c2010-01-11 16:15:53 +09001302static int create_device(struct ramzswap *rzs, int device_id)
Nitin Gupta306b0c92009-09-22 10:26:53 +05301303{
Nitin Guptade1a21a2010-01-28 21:13:40 +05301304 int ret = 0;
1305
Nitin Gupta306b0c92009-09-22 10:26:53 +05301306 mutex_init(&rzs->lock);
Nitin Gupta6a907722010-01-28 21:13:37 +05301307 spin_lock_init(&rzs->stat64_lock);
Nitin Gupta306b0c92009-09-22 10:26:53 +05301308 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1309
1310 rzs->queue = blk_alloc_queue(GFP_KERNEL);
1311 if (!rzs->queue) {
1312 pr_err("Error allocating disk queue for device %d\n",
1313 device_id);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301314 ret = -ENOMEM;
1315 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301316 }
1317
1318 blk_queue_make_request(rzs->queue, ramzswap_make_request);
1319 rzs->queue->queuedata = rzs;
1320
1321 /* gendisk structure */
1322 rzs->disk = alloc_disk(1);
1323 if (!rzs->disk) {
1324 blk_cleanup_queue(rzs->queue);
1325 pr_warning("Error allocating disk structure for device %d\n",
1326 device_id);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301327 ret = -ENOMEM;
1328 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301329 }
1330
1331 rzs->disk->major = ramzswap_major;
1332 rzs->disk->first_minor = device_id;
1333 rzs->disk->fops = &ramzswap_devops;
1334 rzs->disk->queue = rzs->queue;
1335 rzs->disk->private_data = rzs;
1336 snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
1337
1338 /*
1339 * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
1340 * or set equal to backing swap device (if provided)
1341 */
1342 set_capacity(rzs->disk, 0);
Nitin Gupta5d83d5a2010-01-28 21:13:39 +05301343
1344 blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
1345 blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
1346
Nitin Gupta306b0c92009-09-22 10:26:53 +05301347 add_disk(rzs->disk);
1348
1349 rzs->init_done = 0;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301350
1351out:
1352 return ret;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301353}
1354
1355static void destroy_device(struct ramzswap *rzs)
1356{
1357 if (rzs->disk) {
1358 del_gendisk(rzs->disk);
1359 put_disk(rzs->disk);
1360 }
1361
1362 if (rzs->queue)
1363 blk_cleanup_queue(rzs->queue);
1364}
1365
1366static int __init ramzswap_init(void)
1367{
Nitin Guptade1a21a2010-01-28 21:13:40 +05301368 int ret, dev_id;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301369
1370 if (num_devices > max_num_devices) {
1371 pr_warning("Invalid value for num_devices: %u\n",
1372 num_devices);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301373 ret = -EINVAL;
1374 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301375 }
1376
1377 ramzswap_major = register_blkdev(0, "ramzswap");
1378 if (ramzswap_major <= 0) {
1379 pr_warning("Unable to get major number\n");
Nitin Guptade1a21a2010-01-28 21:13:40 +05301380 ret = -EBUSY;
1381 goto out;
Nitin Gupta306b0c92009-09-22 10:26:53 +05301382 }
1383
1384 if (!num_devices) {
1385 pr_info("num_devices not specified. Using default: 1\n");
1386 num_devices = 1;
1387 }
1388
1389 /* Allocate the device array and initialize each one */
1390 pr_info("Creating %u devices ...\n", num_devices);
1391 devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
Nitin Guptade1a21a2010-01-28 21:13:40 +05301392 if (!devices) {
1393 ret = -ENOMEM;
1394 goto unregister;
1395 }
Nitin Gupta306b0c92009-09-22 10:26:53 +05301396
Nitin Guptade1a21a2010-01-28 21:13:40 +05301397 for (dev_id = 0; dev_id < num_devices; dev_id++) {
1398 ret = create_device(&devices[dev_id], dev_id);
1399 if (ret)
Minchan Kim3bf040c2010-01-11 16:15:53 +09001400 goto free_devices;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301401 }
1402
Nitin Gupta306b0c92009-09-22 10:26:53 +05301403 return 0;
Nitin Guptade1a21a2010-01-28 21:13:40 +05301404
Minchan Kim3bf040c2010-01-11 16:15:53 +09001405free_devices:
Nitin Guptade1a21a2010-01-28 21:13:40 +05301406 while (dev_id)
1407 destroy_device(&devices[--dev_id]);
1408unregister:
Nitin Gupta306b0c92009-09-22 10:26:53 +05301409 unregister_blkdev(ramzswap_major, "ramzswap");
Nitin Guptade1a21a2010-01-28 21:13:40 +05301410out:
Nitin Gupta306b0c92009-09-22 10:26:53 +05301411 return ret;
1412}
1413
1414static void __exit ramzswap_exit(void)
1415{
1416 int i;
1417 struct ramzswap *rzs;
1418
1419 for (i = 0; i < num_devices; i++) {
1420 rzs = &devices[i];
1421
1422 destroy_device(rzs);
1423 if (rzs->init_done)
1424 reset_device(rzs);
1425 }
1426
1427 unregister_blkdev(ramzswap_major, "ramzswap");
1428
1429 kfree(devices);
1430 pr_debug("Cleanup done!\n");
1431}
1432
1433module_param(num_devices, uint, 0);
1434MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
1435
1436module_init(ramzswap_init);
1437module_exit(ramzswap_exit);
1438
1439MODULE_LICENSE("Dual BSD/GPL");
1440MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1441MODULE_DESCRIPTION("Compressed RAM Based Swap Device");