blob: 9a56ff94308dff7a06a5b0fd5cff8ed3b14249ad [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
52#include "2_5compat.h"
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
58enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP };
59
60struct io_thread_req {
61 enum ubd_req op;
62 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
68 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
71 int map_fd;
72 unsigned long long map_offset;
73 int error;
74};
75
76extern int open_ubd_file(char *file, struct openflags *openflags,
77 char **backing_file_out, int *bitmap_offset_out,
78 unsigned long *bitmap_len_out, int *data_offset_out,
79 int *create_cow_out);
80extern int create_cow_file(char *cow_file, char *backing_file,
81 struct openflags flags, int sectorsize,
82 int alignment, int *bitmap_offset_out,
83 unsigned long *bitmap_len_out,
84 int *data_offset_out);
85extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
86extern void do_io(struct io_thread_req *req);
87
88static inline int ubd_test_bit(__u64 bit, unsigned char *data)
89{
90 __u64 n;
91 int bits, off;
92
93 bits = sizeof(data[0]) * 8;
94 n = bit / bits;
95 off = bit % bits;
96 return((data[n] & (1 << off)) != 0);
97}
98
99static inline void ubd_set_bit(__u64 bit, unsigned char *data)
100{
101 __u64 n;
102 int bits, off;
103
104 bits = sizeof(data[0]) * 8;
105 n = bit / bits;
106 off = bit % bits;
107 data[n] |= (1 << off);
108}
109/*End stuff from ubd_user.h*/
110
111#define DRIVER_NAME "uml-blkdev"
112
113static DEFINE_SPINLOCK(ubd_io_lock);
114static DEFINE_SPINLOCK(ubd_lock);
115
116static void (*do_ubd)(void);
117
118static int ubd_open(struct inode * inode, struct file * filp);
119static int ubd_release(struct inode * inode, struct file * file);
120static int ubd_ioctl(struct inode * inode, struct file * file,
121 unsigned int cmd, unsigned long arg);
122
123#define MAX_DEV (8)
124
125/* Changed in early boot */
126static int ubd_do_mmap = 0;
127#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
128
129static struct block_device_operations ubd_blops = {
130 .owner = THIS_MODULE,
131 .open = ubd_open,
132 .release = ubd_release,
133 .ioctl = ubd_ioctl,
134};
135
136/* Protected by the queue_lock */
137static request_queue_t *ubd_queue;
138
139/* Protected by ubd_lock */
140static int fake_major = MAJOR_NR;
141
142static struct gendisk *ubd_gendisk[MAX_DEV];
143static struct gendisk *fake_gendisk[MAX_DEV];
144
145#ifdef CONFIG_BLK_DEV_UBD_SYNC
146#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
147 .cl = 1 })
148#else
149#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
150 .cl = 1 })
151#endif
152
153/* Not protected - changed only in ubd_setup_common and then only to
154 * to enable O_SYNC.
155 */
156static struct openflags global_openflags = OPEN_FLAGS;
157
158struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700159 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 char *file;
161 int fd;
162 unsigned long *bitmap;
163 unsigned long bitmap_len;
164 int bitmap_offset;
165 int data_offset;
166};
167
168struct ubd {
169 char *file;
170 int count;
171 int fd;
172 __u64 size;
173 struct openflags boot_openflags;
174 struct openflags openflags;
175 int no_cow;
176 struct cow cow;
177 struct platform_device pdev;
178
179 int map_writes;
180 int map_reads;
181 int nomap_writes;
182 int nomap_reads;
183 int write_maps;
184};
185
186#define DEFAULT_COW { \
187 .file = NULL, \
188 .fd = -1, \
189 .bitmap = NULL, \
190 .bitmap_offset = 0, \
191 .data_offset = 0, \
192}
193
194#define DEFAULT_UBD { \
195 .file = NULL, \
196 .count = 0, \
197 .fd = -1, \
198 .size = -1, \
199 .boot_openflags = OPEN_FLAGS, \
200 .openflags = OPEN_FLAGS, \
201 .no_cow = 0, \
202 .cow = DEFAULT_COW, \
203 .map_writes = 0, \
204 .map_reads = 0, \
205 .nomap_writes = 0, \
206 .nomap_reads = 0, \
207 .write_maps = 0, \
208}
209
210struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
211
212static int ubd0_init(void)
213{
214 struct ubd *dev = &ubd_dev[0];
215
216 if(dev->file == NULL)
217 dev->file = "root_fs";
218 return(0);
219}
220
221__initcall(ubd0_init);
222
223/* Only changed by fake_ide_setup which is a setup */
224static int fake_ide = 0;
225static struct proc_dir_entry *proc_ide_root = NULL;
226static struct proc_dir_entry *proc_ide = NULL;
227
228static void make_proc_ide(void)
229{
230 proc_ide_root = proc_mkdir("ide", NULL);
231 proc_ide = proc_mkdir("ide0", proc_ide_root);
232}
233
234static int proc_ide_read_media(char *page, char **start, off_t off, int count,
235 int *eof, void *data)
236{
237 int len;
238
239 strcpy(page, "disk\n");
240 len = strlen("disk\n");
241 len -= off;
242 if (len < count){
243 *eof = 1;
244 if (len <= 0) return 0;
245 }
246 else len = count;
247 *start = page + off;
248 return len;
249}
250
251static void make_ide_entries(char *dev_name)
252{
253 struct proc_dir_entry *dir, *ent;
254 char name[64];
255
256 if(proc_ide_root == NULL) make_proc_ide();
257
258 dir = proc_mkdir(dev_name, proc_ide);
259 if(!dir) return;
260
261 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
262 if(!ent) return;
263 ent->nlink = 1;
264 ent->data = NULL;
265 ent->read_proc = proc_ide_read_media;
266 ent->write_proc = NULL;
267 sprintf(name,"ide0/%s", dev_name);
268 proc_symlink(dev_name, proc_ide_root, name);
269}
270
271static int fake_ide_setup(char *str)
272{
273 fake_ide = 1;
274 return(1);
275}
276
277__setup("fake_ide", fake_ide_setup);
278
279__uml_help(fake_ide_setup,
280"fake_ide\n"
281" Create ide0 entries that map onto ubd devices.\n\n"
282);
283
284static int parse_unit(char **ptr)
285{
286 char *str = *ptr, *end;
287 int n = -1;
288
289 if(isdigit(*str)) {
290 n = simple_strtoul(str, &end, 0);
291 if(end == str)
292 return(-1);
293 *ptr = end;
294 }
295 else if (('a' <= *str) && (*str <= 'h')) {
296 n = *str - 'a';
297 str++;
298 *ptr = str;
299 }
300 return(n);
301}
302
303static int ubd_setup_common(char *str, int *index_out)
304{
305 struct ubd *dev;
306 struct openflags flags = global_openflags;
307 char *backing_file;
308 int n, err, i;
309
310 if(index_out) *index_out = -1;
311 n = *str;
312 if(n == '='){
313 char *end;
314 int major;
315
316 str++;
317 if(!strcmp(str, "mmap")){
318 CHOOSE_MODE(printk("mmap not supported by the ubd "
319 "driver in tt mode\n"),
320 ubd_do_mmap = 1);
321 return(0);
322 }
323
324 if(!strcmp(str, "sync")){
325 global_openflags = of_sync(global_openflags);
326 return(0);
327 }
328 major = simple_strtoul(str, &end, 0);
329 if((*end != '\0') || (end == str)){
330 printk(KERN_ERR
331 "ubd_setup : didn't parse major number\n");
332 return(1);
333 }
334
335 err = 1;
336 spin_lock(&ubd_lock);
337 if(fake_major != MAJOR_NR){
338 printk(KERN_ERR "Can't assign a fake major twice\n");
339 goto out1;
340 }
341
342 fake_major = major;
343
344 printk(KERN_INFO "Setting extra ubd major number to %d\n",
345 major);
346 err = 0;
347 out1:
348 spin_unlock(&ubd_lock);
349 return(err);
350 }
351
352 n = parse_unit(&str);
353 if(n < 0){
354 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
355 "'%s'\n", str);
356 return(1);
357 }
358 if(n >= MAX_DEV){
359 printk(KERN_ERR "ubd_setup : index %d out of range "
360 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
361 return(1);
362 }
363
364 err = 1;
365 spin_lock(&ubd_lock);
366
367 dev = &ubd_dev[n];
368 if(dev->file != NULL){
369 printk(KERN_ERR "ubd_setup : device already configured\n");
370 goto out;
371 }
372
373 if (index_out)
374 *index_out = n;
375
376 for (i = 0; i < 4; i++) {
377 switch (*str) {
378 case 'r':
379 flags.w = 0;
380 break;
381 case 's':
382 flags.s = 1;
383 break;
384 case 'd':
385 dev->no_cow = 1;
386 break;
387 case '=':
388 str++;
389 goto break_loop;
390 default:
391 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
392 goto out;
393 }
394 str++;
395 }
396
397 if (*str == '=')
398 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
399 else
400 printk(KERN_ERR "ubd_setup : Expected '='\n");
401 goto out;
402
403break_loop:
404 err = 0;
405 backing_file = strchr(str, ',');
406
407 if (!backing_file) {
408 backing_file = strchr(str, ':');
409 }
410
411 if(backing_file){
412 if(dev->no_cow)
413 printk(KERN_ERR "Can't specify both 'd' and a "
414 "cow file\n");
415 else {
416 *backing_file = '\0';
417 backing_file++;
418 }
419 }
420 dev->file = str;
421 dev->cow.file = backing_file;
422 dev->boot_openflags = flags;
423out:
424 spin_unlock(&ubd_lock);
425 return(err);
426}
427
428static int ubd_setup(char *str)
429{
430 ubd_setup_common(str, NULL);
431 return(1);
432}
433
434__setup("ubd", ubd_setup);
435__uml_help(ubd_setup,
436"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
437" This is used to associate a device with a file in the underlying\n"
438" filesystem. When specifying two filenames, the first one is the\n"
439" COW name and the second is the backing file name. As separator you can\n"
440" use either a ':' or a ',': the first one allows writing things like;\n"
441" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
442" while with a ',' the shell would not expand the 2nd '~'.\n"
443" When using only one filename, UML will detect whether to thread it like\n"
444" a COW file or a backing file. To override this detection, add the 'd'\n"
445" flag:\n"
446" ubd0d=BackingFile\n"
447" Usually, there is a filesystem in the file, but \n"
448" that's not required. Swap devices containing swap files can be\n"
449" specified like this. Also, a file which doesn't contain a\n"
450" filesystem can have its contents read in the virtual \n"
451" machine by running 'dd' on the device. <n> must be in the range\n"
452" 0 to 7. Appending an 'r' to the number will cause that device\n"
453" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
454" an 's' will cause data to be written to disk on the host immediately.\n\n"
455);
456
457static int udb_setup(char *str)
458{
459 printk("udb%s specified on command line is almost certainly a ubd -> "
460 "udb TYPO\n", str);
461 return(1);
462}
463
464__setup("udb", udb_setup);
465__uml_help(udb_setup,
466"udb\n"
467" This option is here solely to catch ubd -> udb typos, which can be\n\n"
468" to impossible to catch visually unless you specifically look for\n\n"
469" them. The only result of any option starting with 'udb' is an error\n\n"
470" in the boot output.\n\n"
471);
472
473static int fakehd_set = 0;
474static int fakehd(char *str)
475{
476 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
477 fakehd_set = 1;
478 return 1;
479}
480
481__setup("fakehd", fakehd);
482__uml_help(fakehd,
483"fakehd\n"
484" Change the ubd device name to \"hd\".\n\n"
485);
486
487static void do_ubd_request(request_queue_t * q);
488
489/* Only changed by ubd_init, which is an initcall. */
490int thread_fd = -1;
491
492/* Changed by ubd_handler, which is serialized because interrupts only
493 * happen on CPU 0.
494 */
495int intr_count = 0;
496
497/* call ubd_finish if you need to serialize */
498static void __ubd_finish(struct request *req, int error)
499{
500 int nsect;
501
502 if(error){
503 end_request(req, 0);
504 return;
505 }
506 nsect = req->current_nr_sectors;
507 req->sector += nsect;
508 req->buffer += nsect << 9;
509 req->errors = 0;
510 req->nr_sectors -= nsect;
511 req->current_nr_sectors = 0;
512 end_request(req, 1);
513}
514
515static inline void ubd_finish(struct request *req, int error)
516{
517 spin_lock(&ubd_io_lock);
518 __ubd_finish(req, error);
519 spin_unlock(&ubd_io_lock);
520}
521
522/* Called without ubd_io_lock held */
523static void ubd_handler(void)
524{
525 struct io_thread_req req;
526 struct request *rq = elv_next_request(ubd_queue);
527 int n, err;
528
529 do_ubd = NULL;
530 intr_count++;
531 n = os_read_file(thread_fd, &req, sizeof(req));
532 if(n != sizeof(req)){
533 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
534 "err = %d\n", os_getpid(), -n);
535 spin_lock(&ubd_io_lock);
536 end_request(rq, 0);
537 spin_unlock(&ubd_io_lock);
538 return;
539 }
540
541 if((req.op != UBD_MMAP) &&
542 ((req.offset != ((__u64) (rq->sector)) << 9) ||
543 (req.length != (rq->current_nr_sectors) << 9)))
544 panic("I/O op mismatch");
545
546 if(req.map_fd != -1){
547 err = physmem_subst_mapping(req.buffer, req.map_fd,
548 req.map_offset, 1);
549 if(err)
550 printk("ubd_handler - physmem_subst_mapping failed, "
551 "err = %d\n", -err);
552 }
553
554 ubd_finish(rq, req.error);
555 reactivate_fd(thread_fd, UBD_IRQ);
556 do_ubd_request(ubd_queue);
557}
558
559static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
560{
561 ubd_handler();
562 return(IRQ_HANDLED);
563}
564
565/* Only changed by ubd_init, which is an initcall. */
566static int io_pid = -1;
567
568void kill_io_thread(void)
569{
570 if(io_pid != -1)
571 os_kill_process(io_pid, 1);
572}
573
574__uml_exitcall(kill_io_thread);
575
576static int ubd_file_size(struct ubd *dev, __u64 *size_out)
577{
578 char *file;
579
580 file = dev->cow.file ? dev->cow.file : dev->file;
581 return(os_file_size(file, size_out));
582}
583
584static void ubd_close(struct ubd *dev)
585{
586 if(ubd_do_mmap)
587 physmem_forget_descriptor(dev->fd);
588 os_close_file(dev->fd);
589 if(dev->cow.file == NULL)
590 return;
591
592 if(ubd_do_mmap)
593 physmem_forget_descriptor(dev->cow.fd);
594 os_close_file(dev->cow.fd);
595 vfree(dev->cow.bitmap);
596 dev->cow.bitmap = NULL;
597}
598
599static int ubd_open_dev(struct ubd *dev)
600{
601 struct openflags flags;
602 char **back_ptr;
603 int err, create_cow, *create_ptr;
604
605 dev->openflags = dev->boot_openflags;
606 create_cow = 0;
607 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
608 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
609 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
610 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
611 &dev->cow.data_offset, create_ptr);
612
613 if((dev->fd == -ENOENT) && create_cow){
614 dev->fd = create_cow_file(dev->file, dev->cow.file,
615 dev->openflags, 1 << 9, PAGE_SIZE,
616 &dev->cow.bitmap_offset,
617 &dev->cow.bitmap_len,
618 &dev->cow.data_offset);
619 if(dev->fd >= 0){
620 printk(KERN_INFO "Creating \"%s\" as COW file for "
621 "\"%s\"\n", dev->file, dev->cow.file);
622 }
623 }
624
625 if(dev->fd < 0){
626 printk("Failed to open '%s', errno = %d\n", dev->file,
627 -dev->fd);
628 return(dev->fd);
629 }
630
631 if(dev->cow.file != NULL){
632 err = -ENOMEM;
633 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
634 if(dev->cow.bitmap == NULL){
635 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
636 goto error;
637 }
638 flush_tlb_kernel_vm();
639
640 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
641 dev->cow.bitmap_offset,
642 dev->cow.bitmap_len);
643 if(err < 0)
644 goto error;
645
646 flags = dev->openflags;
647 flags.w = 0;
648 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
649 NULL, NULL);
650 if(err < 0) goto error;
651 dev->cow.fd = err;
652 }
653 return(0);
654 error:
655 os_close_file(dev->fd);
656 return(err);
657}
658
659static int ubd_new_disk(int major, u64 size, int unit,
660 struct gendisk **disk_out)
661
662{
663 struct gendisk *disk;
664 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
665 int err;
666
667 disk = alloc_disk(1 << UBD_SHIFT);
668 if(disk == NULL)
669 return(-ENOMEM);
670
671 disk->major = major;
672 disk->first_minor = unit << UBD_SHIFT;
673 disk->fops = &ubd_blops;
674 set_capacity(disk, size / 512);
675 if(major == MAJOR_NR){
676 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
677 sprintf(disk->devfs_name, "ubd/disc%d", unit);
678 sprintf(from, "ubd/%d", unit);
679 sprintf(to, "disc%d/disc", unit);
680 err = devfs_mk_symlink(from, to);
681 if(err)
682 printk("ubd_new_disk failed to make link from %s to "
683 "%s, error = %d\n", from, to, err);
684 }
685 else {
686 sprintf(disk->disk_name, "ubd_fake%d", unit);
687 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
688 }
689
690 /* sysfs register (not for ide fake devices) */
691 if (major == MAJOR_NR) {
692 ubd_dev[unit].pdev.id = unit;
693 ubd_dev[unit].pdev.name = DRIVER_NAME;
694 platform_device_register(&ubd_dev[unit].pdev);
695 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
696 }
697
698 disk->private_data = &ubd_dev[unit];
699 disk->queue = ubd_queue;
700 add_disk(disk);
701
702 *disk_out = disk;
703 return 0;
704}
705
706#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
707
708static int ubd_add(int n)
709{
710 struct ubd *dev = &ubd_dev[n];
711 int err;
712
713 if(dev->file == NULL)
714 return(-ENODEV);
715
716 if (ubd_open_dev(dev))
717 return(-ENODEV);
718
719 err = ubd_file_size(dev, &dev->size);
720 if(err < 0)
721 return(err);
722
723 dev->size = ROUND_BLOCK(dev->size);
724
725 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
726 if(err)
727 return(err);
728
729 if(fake_major != MAJOR_NR)
730 ubd_new_disk(fake_major, dev->size, n,
731 &fake_gendisk[n]);
732
733 /* perhaps this should also be under the "if (fake_major)" above */
734 /* using the fake_disk->disk_name and also the fakehd_set name */
735 if (fake_ide)
736 make_ide_entries(ubd_gendisk[n]->disk_name);
737
738 ubd_close(dev);
739 return 0;
740}
741
742static int ubd_config(char *str)
743{
744 int n, err;
745
746 str = uml_strdup(str);
747 if(str == NULL){
748 printk(KERN_ERR "ubd_config failed to strdup string\n");
749 return(1);
750 }
751 err = ubd_setup_common(str, &n);
752 if(err){
753 kfree(str);
754 return(-1);
755 }
756 if(n == -1) return(0);
757
758 spin_lock(&ubd_lock);
759 err = ubd_add(n);
760 if(err)
761 ubd_dev[n].file = NULL;
762 spin_unlock(&ubd_lock);
763
764 return(err);
765}
766
767static int ubd_get_config(char *name, char *str, int size, char **error_out)
768{
769 struct ubd *dev;
770 int n, len = 0;
771
772 n = parse_unit(&name);
773 if((n >= MAX_DEV) || (n < 0)){
774 *error_out = "ubd_get_config : device number out of range";
775 return(-1);
776 }
777
778 dev = &ubd_dev[n];
779 spin_lock(&ubd_lock);
780
781 if(dev->file == NULL){
782 CONFIG_CHUNK(str, size, len, "", 1);
783 goto out;
784 }
785
786 CONFIG_CHUNK(str, size, len, dev->file, 0);
787
788 if(dev->cow.file != NULL){
789 CONFIG_CHUNK(str, size, len, ",", 0);
790 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
791 }
792 else CONFIG_CHUNK(str, size, len, "", 1);
793
794 out:
795 spin_unlock(&ubd_lock);
796 return(len);
797}
798
799static int ubd_remove(char *str)
800{
801 struct ubd *dev;
802 int n, err = -ENODEV;
803
804 n = parse_unit(&str);
805
806 if((n < 0) || (n >= MAX_DEV))
807 return(err);
808
809 dev = &ubd_dev[n];
810 if(dev->count > 0)
811 return(-EBUSY); /* you cannot remove a open disk */
812
813 err = 0;
814 spin_lock(&ubd_lock);
815
816 if(ubd_gendisk[n] == NULL)
817 goto out;
818
819 del_gendisk(ubd_gendisk[n]);
820 put_disk(ubd_gendisk[n]);
821 ubd_gendisk[n] = NULL;
822
823 if(fake_gendisk[n] != NULL){
824 del_gendisk(fake_gendisk[n]);
825 put_disk(fake_gendisk[n]);
826 fake_gendisk[n] = NULL;
827 }
828
829 platform_device_unregister(&dev->pdev);
830 *dev = ((struct ubd) DEFAULT_UBD);
831 err = 0;
832 out:
833 spin_unlock(&ubd_lock);
834 return(err);
835}
836
837static struct mc_device ubd_mc = {
838 .name = "ubd",
839 .config = ubd_config,
840 .get_config = ubd_get_config,
841 .remove = ubd_remove,
842};
843
844static int ubd_mc_init(void)
845{
846 mconsole_register_dev(&ubd_mc);
847 return 0;
848}
849
850__initcall(ubd_mc_init);
851
852static struct device_driver ubd_driver = {
853 .name = DRIVER_NAME,
854 .bus = &platform_bus_type,
855};
856
857int ubd_init(void)
858{
859 int i;
860
861 devfs_mk_dir("ubd");
862 if (register_blkdev(MAJOR_NR, "ubd"))
863 return -1;
864
865 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
866 if (!ubd_queue) {
867 unregister_blkdev(MAJOR_NR, "ubd");
868 return -1;
869 }
870
871 if (fake_major != MAJOR_NR) {
872 char name[sizeof("ubd_nnn\0")];
873
874 snprintf(name, sizeof(name), "ubd_%d", fake_major);
875 devfs_mk_dir(name);
876 if (register_blkdev(fake_major, "ubd"))
877 return -1;
878 }
879 driver_register(&ubd_driver);
880 for (i = 0; i < MAX_DEV; i++)
881 ubd_add(i);
882 return 0;
883}
884
885late_initcall(ubd_init);
886
887int ubd_driver_init(void){
888 unsigned long stack;
889 int err;
890
891 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
892 if(global_openflags.s){
893 printk(KERN_INFO "ubd: Synchronous mode\n");
894 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
895 * enough. So use anyway the io thread. */
896 }
897 stack = alloc_stack(0, 0);
898 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
899 &thread_fd);
900 if(io_pid < 0){
901 printk(KERN_ERR
902 "ubd : Failed to start I/O thread (errno = %d) - "
903 "falling back to synchronous I/O\n", -io_pid);
904 io_pid = -1;
905 return(0);
906 }
907 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
908 SA_INTERRUPT, "ubd", ubd_dev);
909 if(err != 0)
910 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
911 return(err);
912}
913
914device_initcall(ubd_driver_init);
915
916static int ubd_open(struct inode *inode, struct file *filp)
917{
918 struct gendisk *disk = inode->i_bdev->bd_disk;
919 struct ubd *dev = disk->private_data;
920 int err = 0;
921
922 if(dev->count == 0){
923 err = ubd_open_dev(dev);
924 if(err){
925 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
926 disk->disk_name, dev->file, -err);
927 goto out;
928 }
929 }
930 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700931 set_disk_ro(disk, !dev->openflags.w);
932
933 /* This should no more be needed. And it didn't work anyway to exclude
934 * read-write remounting of filesystems.*/
935 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 if(--dev->count == 0) ubd_close(dev);
937 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700938 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 out:
940 return(err);
941}
942
943static int ubd_release(struct inode * inode, struct file * file)
944{
945 struct gendisk *disk = inode->i_bdev->bd_disk;
946 struct ubd *dev = disk->private_data;
947
948 if(--dev->count == 0)
949 ubd_close(dev);
950 return(0);
951}
952
953static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
954 __u64 *cow_offset, unsigned long *bitmap,
955 __u64 bitmap_offset, unsigned long *bitmap_words,
956 __u64 bitmap_len)
957{
958 __u64 sector = io_offset >> 9;
959 int i, update_bitmap = 0;
960
961 for(i = 0; i < length >> 9; i++){
962 if(cow_mask != NULL)
963 ubd_set_bit(i, (unsigned char *) cow_mask);
964 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
965 continue;
966
967 update_bitmap = 1;
968 ubd_set_bit(sector + i, (unsigned char *) bitmap);
969 }
970
971 if(!update_bitmap)
972 return;
973
974 *cow_offset = sector / (sizeof(unsigned long) * 8);
975
976 /* This takes care of the case where we're exactly at the end of the
977 * device, and *cow_offset + 1 is off the end. So, just back it up
978 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
979 * for the original diagnosis.
980 */
981 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
982 sizeof(unsigned long) - 1))
983 (*cow_offset)--;
984
985 bitmap_words[0] = bitmap[*cow_offset];
986 bitmap_words[1] = bitmap[*cow_offset + 1];
987
988 *cow_offset *= sizeof(unsigned long);
989 *cow_offset += bitmap_offset;
990}
991
992static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
993 __u64 bitmap_offset, __u64 bitmap_len)
994{
995 __u64 sector = req->offset >> 9;
996 int i;
997
998 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
999 panic("Operation too long");
1000
1001 if(req->op == UBD_READ) {
1002 for(i = 0; i < req->length >> 9; i++){
1003 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1004 ubd_set_bit(i, (unsigned char *)
1005 &req->sector_mask);
1006 }
1007 }
1008 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1009 &req->cow_offset, bitmap, bitmap_offset,
1010 req->bitmap_words, bitmap_len);
1011}
1012
1013static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
1014{
1015 __u64 sector;
1016 unsigned char *bitmap;
1017 int bit, i;
1018
1019 /* mmap must have been requested on the command line */
1020 if(!ubd_do_mmap)
1021 return(-1);
1022
1023 /* The buffer must be page aligned */
1024 if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
1025 return(-1);
1026
1027 /* The request must be a page long */
1028 if((req->current_nr_sectors << 9) != PAGE_SIZE)
1029 return(-1);
1030
1031 if(dev->cow.file == NULL)
1032 return(dev->fd);
1033
1034 sector = offset >> 9;
1035 bitmap = (unsigned char *) dev->cow.bitmap;
1036 bit = ubd_test_bit(sector, bitmap);
1037
1038 for(i = 1; i < req->current_nr_sectors; i++){
1039 if(ubd_test_bit(sector + i, bitmap) != bit)
1040 return(-1);
1041 }
1042
1043 if(bit || (rq_data_dir(req) == WRITE))
1044 offset += dev->cow.data_offset;
1045
1046 /* The data on disk must be page aligned */
1047 if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
1048 return(-1);
1049
1050 return(bit ? dev->fd : dev->cow.fd);
1051}
1052
1053static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
1054 struct request *req,
1055 struct io_thread_req *io_req)
1056{
1057 int err;
1058
1059 if(rq_data_dir(req) == WRITE){
1060 /* Writes are almost no-ops since the new data is already in the
1061 * host page cache
1062 */
1063 dev->map_writes++;
1064 if(dev->cow.file != NULL)
1065 cowify_bitmap(io_req->offset, io_req->length,
1066 &io_req->sector_mask, &io_req->cow_offset,
1067 dev->cow.bitmap, dev->cow.bitmap_offset,
1068 io_req->bitmap_words,
1069 dev->cow.bitmap_len);
1070 }
1071 else {
1072 int w;
1073
1074 if((dev->cow.file != NULL) && (fd == dev->cow.fd))
1075 w = 0;
1076 else w = dev->openflags.w;
1077
1078 if((dev->cow.file != NULL) && (fd == dev->fd))
1079 offset += dev->cow.data_offset;
1080
1081 err = physmem_subst_mapping(req->buffer, fd, offset, w);
1082 if(err){
1083 printk("physmem_subst_mapping failed, err = %d\n",
1084 -err);
1085 return(1);
1086 }
1087 dev->map_reads++;
1088 }
1089 io_req->op = UBD_MMAP;
1090 io_req->buffer = req->buffer;
1091 return(0);
1092}
1093
1094/* Called with ubd_io_lock held */
1095static int prepare_request(struct request *req, struct io_thread_req *io_req)
1096{
1097 struct gendisk *disk = req->rq_disk;
1098 struct ubd *dev = disk->private_data;
1099 __u64 offset;
1100 int len, fd;
1101
1102 if(req->rq_status == RQ_INACTIVE) return(1);
1103
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001104 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
1106 printk("Write attempted on readonly ubd device %s\n",
1107 disk->disk_name);
1108 end_request(req, 0);
1109 return(1);
1110 }
1111
1112 offset = ((__u64) req->sector) << 9;
1113 len = req->current_nr_sectors << 9;
1114
1115 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1116 io_req->fds[1] = dev->fd;
1117 io_req->map_fd = -1;
1118 io_req->cow_offset = -1;
1119 io_req->offset = offset;
1120 io_req->length = len;
1121 io_req->error = 0;
1122 io_req->sector_mask = 0;
1123
1124 fd = mmap_fd(req, dev, io_req->offset);
1125 if(fd > 0){
1126 /* If mmapping is otherwise OK, but the first access to the
1127 * page is a write, then it's not mapped in yet. So we have
1128 * to write the data to disk first, then we can map the disk
1129 * page in and continue normally from there.
1130 */
1131 if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
1132 io_req->map_fd = dev->fd;
1133 io_req->map_offset = io_req->offset +
1134 dev->cow.data_offset;
1135 dev->write_maps++;
1136 }
1137 else return(prepare_mmap_request(dev, fd, io_req->offset, req,
1138 io_req));
1139 }
1140
1141 if(rq_data_dir(req) == READ)
1142 dev->nomap_reads++;
1143 else dev->nomap_writes++;
1144
1145 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1146 io_req->offsets[0] = 0;
1147 io_req->offsets[1] = dev->cow.data_offset;
1148 io_req->buffer = req->buffer;
1149 io_req->sectorsize = 1 << 9;
1150
1151 if(dev->cow.file != NULL)
1152 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1153 dev->cow.bitmap_len);
1154
1155 return(0);
1156}
1157
1158/* Called with ubd_io_lock held */
1159static void do_ubd_request(request_queue_t *q)
1160{
1161 struct io_thread_req io_req;
1162 struct request *req;
1163 int err, n;
1164
1165 if(thread_fd == -1){
1166 while((req = elv_next_request(q)) != NULL){
1167 err = prepare_request(req, &io_req);
1168 if(!err){
1169 do_io(&io_req);
1170 __ubd_finish(req, io_req.error);
1171 }
1172 }
1173 }
1174 else {
1175 if(do_ubd || (req = elv_next_request(q)) == NULL)
1176 return;
1177 err = prepare_request(req, &io_req);
1178 if(!err){
1179 do_ubd = ubd_handler;
1180 n = os_write_file(thread_fd, (char *) &io_req,
1181 sizeof(io_req));
1182 if(n != sizeof(io_req))
1183 printk("write to io thread failed, "
1184 "errno = %d\n", -n);
1185 }
1186 }
1187}
1188
1189static int ubd_ioctl(struct inode * inode, struct file * file,
1190 unsigned int cmd, unsigned long arg)
1191{
1192 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1193 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1194 struct hd_driveid ubd_id = {
1195 .cyls = 0,
1196 .heads = 128,
1197 .sectors = 32,
1198 };
1199
1200 switch (cmd) {
1201 struct hd_geometry g;
1202 struct cdrom_volctrl volume;
1203 case HDIO_GETGEO:
1204 if(!loc) return(-EINVAL);
1205 g.heads = 128;
1206 g.sectors = 32;
1207 g.cylinders = dev->size / (128 * 32 * 512);
1208 g.start = get_start_sect(inode->i_bdev);
1209 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1210
1211 case HDIO_GET_IDENTITY:
1212 ubd_id.cyls = dev->size / (128 * 32 * 512);
1213 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1214 sizeof(ubd_id)))
1215 return(-EFAULT);
1216 return(0);
1217
1218 case CDROMVOLREAD:
1219 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1220 return(-EFAULT);
1221 volume.channel0 = 255;
1222 volume.channel1 = 255;
1223 volume.channel2 = 255;
1224 volume.channel3 = 255;
1225 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1226 return(-EFAULT);
1227 return(0);
1228 }
1229 return(-EINVAL);
1230}
1231
1232static int ubd_check_remapped(int fd, unsigned long address, int is_write,
1233 __u64 offset)
1234{
1235 __u64 bitmap_offset;
1236 unsigned long new_bitmap[2];
1237 int i, err, n;
1238
1239 /* If it's not a write access, we can't do anything about it */
1240 if(!is_write)
1241 return(0);
1242
1243 /* We have a write */
1244 for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
1245 struct ubd *dev = &ubd_dev[i];
1246
1247 if((dev->fd != fd) && (dev->cow.fd != fd))
1248 continue;
1249
1250 /* It's a write to a ubd device */
1251
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001252 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 if(!dev->openflags.w){
1254 /* It's a write access on a read-only device - probably
1255 * shouldn't happen. If the kernel is trying to change
1256 * something with no intention of writing it back out,
1257 * then this message will clue us in that this needs
1258 * fixing
1259 */
1260 printk("Write access to mapped page from readonly ubd "
1261 "device %d\n", i);
1262 return(0);
1263 }
1264
1265 /* It's a write to a writeable ubd device - it must be COWed
1266 * because, otherwise, the page would have been mapped in
1267 * writeable
1268 */
1269
1270 if(!dev->cow.file)
1271 panic("Write fault on writeable non-COW ubd device %d",
1272 i);
1273
1274 /* It should also be an access to the backing file since the
1275 * COW pages should be mapped in read-write
1276 */
1277
1278 if(fd == dev->fd)
1279 panic("Write fault on a backing page of ubd "
1280 "device %d\n", i);
1281
1282 /* So, we do the write, copying the backing data to the COW
1283 * file...
1284 */
1285
1286 err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
1287 if(err < 0)
1288 panic("Couldn't seek to %lld in COW file of ubd "
1289 "device %d, err = %d",
1290 offset + dev->cow.data_offset, i, -err);
1291
1292 n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
1293 if(n != PAGE_SIZE)
1294 panic("Couldn't copy data to COW file of ubd "
1295 "device %d, err = %d", i, -n);
1296
1297 /* ... updating the COW bitmap... */
1298
1299 cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
1300 dev->cow.bitmap, dev->cow.bitmap_offset,
1301 new_bitmap, dev->cow.bitmap_len);
1302
1303 err = os_seek_file(dev->fd, bitmap_offset);
1304 if(err < 0)
1305 panic("Couldn't seek to %lld in COW file of ubd "
1306 "device %d, err = %d", bitmap_offset, i, -err);
1307
1308 n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
1309 if(n != sizeof(new_bitmap))
1310 panic("Couldn't update bitmap of ubd device %d, "
1311 "err = %d", i, -n);
1312
1313 /* Maybe we can map the COW page in, and maybe we can't. If
1314 * it is a pre-V3 COW file, we can't, since the alignment will
1315 * be wrong. If it is a V3 or later COW file which has been
1316 * moved to a system with a larger page size, then maybe we
1317 * can't, depending on the exact location of the page.
1318 */
1319
1320 offset += dev->cow.data_offset;
1321
1322 /* Remove the remapping, putting the original anonymous page
1323 * back. If the COW file can be mapped in, that is done.
1324 * Otherwise, the COW page is read in.
1325 */
1326
1327 if(!physmem_remove_mapping((void *) address))
1328 panic("Address 0x%lx not remapped by ubd device %d",
1329 address, i);
1330 if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
1331 physmem_subst_mapping((void *) address, dev->fd,
1332 offset, 1);
1333 else {
1334 err = os_seek_file(dev->fd, offset);
1335 if(err < 0)
1336 panic("Couldn't seek to %lld in COW file of "
1337 "ubd device %d, err = %d", offset, i,
1338 -err);
1339
1340 n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
1341 if(n != PAGE_SIZE)
1342 panic("Failed to read page from offset %llx of "
1343 "COW file of ubd device %d, err = %d",
1344 offset, i, -n);
1345 }
1346
1347 return(1);
1348 }
1349
1350 /* It's not a write on a ubd device */
1351 return(0);
1352}
1353
1354static struct remapper ubd_remapper = {
1355 .list = LIST_HEAD_INIT(ubd_remapper.list),
1356 .proc = ubd_check_remapped,
1357};
1358
1359static int ubd_remapper_setup(void)
1360{
1361 if(ubd_do_mmap)
1362 register_remapper(&ubd_remapper);
1363
1364 return(0);
1365}
1366
1367__initcall(ubd_remapper_setup);
1368
1369static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1370{
1371 struct uml_stat buf1, buf2;
1372 int err;
1373
1374 if(from_cmdline == NULL) return(1);
1375 if(!strcmp(from_cmdline, from_cow)) return(1);
1376
1377 err = os_stat_file(from_cmdline, &buf1);
1378 if(err < 0){
1379 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1380 return(1);
1381 }
1382 err = os_stat_file(from_cow, &buf2);
1383 if(err < 0){
1384 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1385 return(1);
1386 }
1387 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1388 return(1);
1389
1390 printk("Backing file mismatch - \"%s\" requested,\n"
1391 "\"%s\" specified in COW header of \"%s\"\n",
1392 from_cmdline, from_cow, cow);
1393 return(0);
1394}
1395
1396static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1397{
1398 unsigned long modtime;
1399 long long actual;
1400 int err;
1401
1402 err = os_file_modtime(file, &modtime);
1403 if(err < 0){
1404 printk("Failed to get modification time of backing file "
1405 "\"%s\", err = %d\n", file, -err);
1406 return(err);
1407 }
1408
1409 err = os_file_size(file, &actual);
1410 if(err < 0){
1411 printk("Failed to get size of backing file \"%s\", "
1412 "err = %d\n", file, -err);
1413 return(err);
1414 }
1415
1416 if(actual != size){
1417 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1418 * the typecast.*/
1419 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1420 "file\n", (unsigned long long) size, actual);
1421 return(-EINVAL);
1422 }
1423 if(modtime != mtime){
1424 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1425 "file\n", mtime, modtime);
1426 return(-EINVAL);
1427 }
1428 return(0);
1429}
1430
1431int read_cow_bitmap(int fd, void *buf, int offset, int len)
1432{
1433 int err;
1434
1435 err = os_seek_file(fd, offset);
1436 if(err < 0)
1437 return(err);
1438
1439 err = os_read_file(fd, buf, len);
1440 if(err < 0)
1441 return(err);
1442
1443 return(0);
1444}
1445
1446int open_ubd_file(char *file, struct openflags *openflags,
1447 char **backing_file_out, int *bitmap_offset_out,
1448 unsigned long *bitmap_len_out, int *data_offset_out,
1449 int *create_cow_out)
1450{
1451 time_t mtime;
1452 unsigned long long size;
1453 __u32 version, align;
1454 char *backing_file;
1455 int fd, err, sectorsize, same, mode = 0644;
1456
1457 fd = os_open_file(file, *openflags, mode);
1458 if(fd < 0){
1459 if((fd == -ENOENT) && (create_cow_out != NULL))
1460 *create_cow_out = 1;
1461 if(!openflags->w ||
1462 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1463 openflags->w = 0;
1464 fd = os_open_file(file, *openflags, mode);
1465 if(fd < 0)
1466 return(fd);
1467 }
1468
1469 err = os_lock_file(fd, openflags->w);
1470 if(err < 0){
1471 printk("Failed to lock '%s', err = %d\n", file, -err);
1472 goto out_close;
1473 }
1474
1475 if(backing_file_out == NULL) return(fd);
1476
1477 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1478 &size, &sectorsize, &align, bitmap_offset_out);
1479 if(err && (*backing_file_out != NULL)){
1480 printk("Failed to read COW header from COW file \"%s\", "
1481 "errno = %d\n", file, -err);
1482 goto out_close;
1483 }
1484 if(err) return(fd);
1485
1486 if(backing_file_out == NULL) return(fd);
1487
1488 same = same_backing_files(*backing_file_out, backing_file, file);
1489
1490 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1491 printk("Switching backing file to '%s'\n", *backing_file_out);
1492 err = write_cow_header(file, fd, *backing_file_out,
1493 sectorsize, align, &size);
1494 if(err){
1495 printk("Switch failed, errno = %d\n", -err);
1496 return(err);
1497 }
1498 }
1499 else {
1500 *backing_file_out = backing_file;
1501 err = backing_file_mismatch(*backing_file_out, size, mtime);
1502 if(err) goto out_close;
1503 }
1504
1505 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1506 bitmap_len_out, data_offset_out);
1507
1508 return(fd);
1509 out_close:
1510 os_close_file(fd);
1511 return(err);
1512}
1513
1514int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1515 int sectorsize, int alignment, int *bitmap_offset_out,
1516 unsigned long *bitmap_len_out, int *data_offset_out)
1517{
1518 int err, fd;
1519
1520 flags.c = 1;
1521 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1522 if(fd < 0){
1523 err = fd;
1524 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1525 -err);
1526 goto out;
1527 }
1528
1529 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1530 bitmap_offset_out, bitmap_len_out,
1531 data_offset_out);
1532 if(!err)
1533 return(fd);
1534 os_close_file(fd);
1535 out:
1536 return(err);
1537}
1538
1539static int update_bitmap(struct io_thread_req *req)
1540{
1541 int n;
1542
1543 if(req->cow_offset == -1)
1544 return(0);
1545
1546 n = os_seek_file(req->fds[1], req->cow_offset);
1547 if(n < 0){
1548 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1549 return(1);
1550 }
1551
1552 n = os_write_file(req->fds[1], &req->bitmap_words,
1553 sizeof(req->bitmap_words));
1554 if(n != sizeof(req->bitmap_words)){
1555 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1556 req->fds[1]);
1557 return(1);
1558 }
1559
1560 return(0);
1561}
1562
1563void do_io(struct io_thread_req *req)
1564{
1565 char *buf;
1566 unsigned long len;
1567 int n, nsectors, start, end, bit;
1568 int err;
1569 __u64 off;
1570
1571 if(req->op == UBD_MMAP){
1572 /* Touch the page to force the host to do any necessary IO to
1573 * get it into memory
1574 */
1575 n = *((volatile int *) req->buffer);
1576 req->error = update_bitmap(req);
1577 return;
1578 }
1579
1580 nsectors = req->length / req->sectorsize;
1581 start = 0;
1582 do {
1583 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1584 end = start;
1585 while((end < nsectors) &&
1586 (ubd_test_bit(end, (unsigned char *)
1587 &req->sector_mask) == bit))
1588 end++;
1589
1590 off = req->offset + req->offsets[bit] +
1591 start * req->sectorsize;
1592 len = (end - start) * req->sectorsize;
1593 buf = &req->buffer[start * req->sectorsize];
1594
1595 err = os_seek_file(req->fds[bit], off);
1596 if(err < 0){
1597 printk("do_io - lseek failed : err = %d\n", -err);
1598 req->error = 1;
1599 return;
1600 }
1601 if(req->op == UBD_READ){
1602 n = 0;
1603 do {
1604 buf = &buf[n];
1605 len -= n;
1606 n = os_read_file(req->fds[bit], buf, len);
1607 if (n < 0) {
1608 printk("do_io - read failed, err = %d "
1609 "fd = %d\n", -n, req->fds[bit]);
1610 req->error = 1;
1611 return;
1612 }
1613 } while((n < len) && (n != 0));
1614 if (n < len) memset(&buf[n], 0, len - n);
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001615 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 n = os_write_file(req->fds[bit], buf, len);
1617 if(n != len){
1618 printk("do_io - write failed err = %d "
1619 "fd = %d\n", -n, req->fds[bit]);
1620 req->error = 1;
1621 return;
1622 }
1623 }
1624
1625 start = end;
1626 } while(start < nsectors);
1627
1628 req->error = update_bitmap(req);
1629}
1630
1631/* Changed in start_io_thread, which is serialized by being called only
1632 * from ubd_init, which is an initcall.
1633 */
1634int kernel_fd = -1;
1635
1636/* Only changed by the io thread */
1637int io_count = 0;
1638
1639int io_thread(void *arg)
1640{
1641 struct io_thread_req req;
1642 int n;
1643
1644 ignore_sigwinch_sig();
1645 while(1){
1646 n = os_read_file(kernel_fd, &req, sizeof(req));
1647 if(n != sizeof(req)){
1648 if(n < 0)
1649 printk("io_thread - read failed, fd = %d, "
1650 "err = %d\n", kernel_fd, -n);
1651 else {
1652 printk("io_thread - short read, fd = %d, "
1653 "length = %d\n", kernel_fd, n);
1654 }
1655 continue;
1656 }
1657 io_count++;
1658 do_io(&req);
1659 n = os_write_file(kernel_fd, &req, sizeof(req));
1660 if(n != sizeof(req))
1661 printk("io_thread - write failed, fd = %d, err = %d\n",
1662 kernel_fd, -n);
1663 }
1664}
1665
1666/*
1667 * Overrides for Emacs so that we follow Linus's tabbing style.
1668 * Emacs will notice this stuff at the end of the file and automatically
1669 * adjust the settings for this buffer only. This must remain at the end
1670 * of the file.
1671 * ---------------------------------------------------------------------------
1672 * Local variables:
1673 * c-file-style: "linux"
1674 * End:
1675 */