blob: 2a7f6892c55c203d66c03640acdccb8a3655b8e3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
Jeff Dike7b9014c2005-05-20 13:59:11 -070057enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59struct io_thread_req {
60 enum ubd_req op;
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 unsigned long length;
65 char *buffer;
66 int sectorsize;
67 unsigned long sector_mask;
68 unsigned long long cow_offset;
69 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 int error;
71};
72
73extern int open_ubd_file(char *file, struct openflags *openflags,
74 char **backing_file_out, int *bitmap_offset_out,
75 unsigned long *bitmap_len_out, int *data_offset_out,
76 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78 struct openflags flags, int sectorsize,
79 int alignment, int *bitmap_offset_out,
80 unsigned long *bitmap_len_out,
81 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
83extern void do_io(struct io_thread_req *req);
84
85static inline int ubd_test_bit(__u64 bit, unsigned char *data)
86{
87 __u64 n;
88 int bits, off;
89
90 bits = sizeof(data[0]) * 8;
91 n = bit / bits;
92 off = bit % bits;
93 return((data[n] & (1 << off)) != 0);
94}
95
96static inline void ubd_set_bit(__u64 bit, unsigned char *data)
97{
98 __u64 n;
99 int bits, off;
100
101 bits = sizeof(data[0]) * 8;
102 n = bit / bits;
103 off = bit % bits;
104 data[n] |= (1 << off);
105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
110static DEFINE_SPINLOCK(ubd_io_lock);
111static DEFINE_SPINLOCK(ubd_lock);
112
113static void (*do_ubd)(void);
114
115static int ubd_open(struct inode * inode, struct file * filp);
116static int ubd_release(struct inode * inode, struct file * file);
117static int ubd_ioctl(struct inode * inode, struct file * file,
118 unsigned int cmd, unsigned long arg);
119
120#define MAX_DEV (8)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
127};
128
129/* Protected by the queue_lock */
130static request_queue_t *ubd_queue;
131
132/* Protected by ubd_lock */
133static int fake_major = MAJOR_NR;
134
135static struct gendisk *ubd_gendisk[MAX_DEV];
136static struct gendisk *fake_gendisk[MAX_DEV];
137
138#ifdef CONFIG_BLK_DEV_UBD_SYNC
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
140 .cl = 1 })
141#else
142#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
143 .cl = 1 })
144#endif
145
146/* Not protected - changed only in ubd_setup_common and then only to
147 * to enable O_SYNC.
148 */
149static struct openflags global_openflags = OPEN_FLAGS;
150
151struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700152 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 char *file;
154 int fd;
155 unsigned long *bitmap;
156 unsigned long bitmap_len;
157 int bitmap_offset;
158 int data_offset;
159};
160
161struct ubd {
162 char *file;
163 int count;
164 int fd;
165 __u64 size;
166 struct openflags boot_openflags;
167 struct openflags openflags;
168 int no_cow;
169 struct cow cow;
170 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .count = 0, \
184 .fd = -1, \
185 .size = -1, \
186 .boot_openflags = OPEN_FLAGS, \
187 .openflags = OPEN_FLAGS, \
188 .no_cow = 0, \
189 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190}
191
192struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
193
194static int ubd0_init(void)
195{
196 struct ubd *dev = &ubd_dev[0];
197
198 if(dev->file == NULL)
199 dev->file = "root_fs";
200 return(0);
201}
202
203__initcall(ubd0_init);
204
205/* Only changed by fake_ide_setup which is a setup */
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
233static void make_ide_entries(char *dev_name)
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
245 ent->nlink = 1;
246 ent->data = NULL;
247 ent->read_proc = proc_ide_read_media;
248 ent->write_proc = NULL;
249 sprintf(name,"ide0/%s", dev_name);
250 proc_symlink(dev_name, proc_ide_root, name);
251}
252
253static int fake_ide_setup(char *str)
254{
255 fake_ide = 1;
256 return(1);
257}
258
259__setup("fake_ide", fake_ide_setup);
260
261__uml_help(fake_ide_setup,
262"fake_ide\n"
263" Create ide0 entries that map onto ubd devices.\n\n"
264);
265
266static int parse_unit(char **ptr)
267{
268 char *str = *ptr, *end;
269 int n = -1;
270
271 if(isdigit(*str)) {
272 n = simple_strtoul(str, &end, 0);
273 if(end == str)
274 return(-1);
275 *ptr = end;
276 }
277 else if (('a' <= *str) && (*str <= 'h')) {
278 n = *str - 'a';
279 str++;
280 *ptr = str;
281 }
282 return(n);
283}
284
285static int ubd_setup_common(char *str, int *index_out)
286{
287 struct ubd *dev;
288 struct openflags flags = global_openflags;
289 char *backing_file;
290 int n, err, i;
291
292 if(index_out) *index_out = -1;
293 n = *str;
294 if(n == '='){
295 char *end;
296 int major;
297
298 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 if(!strcmp(str, "sync")){
300 global_openflags = of_sync(global_openflags);
301 return(0);
302 }
303 major = simple_strtoul(str, &end, 0);
304 if((*end != '\0') || (end == str)){
305 printk(KERN_ERR
306 "ubd_setup : didn't parse major number\n");
307 return(1);
308 }
309
310 err = 1;
311 spin_lock(&ubd_lock);
312 if(fake_major != MAJOR_NR){
313 printk(KERN_ERR "Can't assign a fake major twice\n");
314 goto out1;
315 }
316
317 fake_major = major;
318
319 printk(KERN_INFO "Setting extra ubd major number to %d\n",
320 major);
321 err = 0;
322 out1:
323 spin_unlock(&ubd_lock);
324 return(err);
325 }
326
327 n = parse_unit(&str);
328 if(n < 0){
329 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
330 "'%s'\n", str);
331 return(1);
332 }
333 if(n >= MAX_DEV){
334 printk(KERN_ERR "ubd_setup : index %d out of range "
335 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
336 return(1);
337 }
338
339 err = 1;
340 spin_lock(&ubd_lock);
341
342 dev = &ubd_dev[n];
343 if(dev->file != NULL){
344 printk(KERN_ERR "ubd_setup : device already configured\n");
345 goto out;
346 }
347
348 if (index_out)
349 *index_out = n;
350
351 for (i = 0; i < 4; i++) {
352 switch (*str) {
353 case 'r':
354 flags.w = 0;
355 break;
356 case 's':
357 flags.s = 1;
358 break;
359 case 'd':
360 dev->no_cow = 1;
361 break;
362 case '=':
363 str++;
364 goto break_loop;
365 default:
366 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
367 goto out;
368 }
369 str++;
370 }
371
372 if (*str == '=')
373 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
374 else
375 printk(KERN_ERR "ubd_setup : Expected '='\n");
376 goto out;
377
378break_loop:
379 err = 0;
380 backing_file = strchr(str, ',');
381
382 if (!backing_file) {
383 backing_file = strchr(str, ':');
384 }
385
386 if(backing_file){
387 if(dev->no_cow)
388 printk(KERN_ERR "Can't specify both 'd' and a "
389 "cow file\n");
390 else {
391 *backing_file = '\0';
392 backing_file++;
393 }
394 }
395 dev->file = str;
396 dev->cow.file = backing_file;
397 dev->boot_openflags = flags;
398out:
399 spin_unlock(&ubd_lock);
400 return(err);
401}
402
403static int ubd_setup(char *str)
404{
405 ubd_setup_common(str, NULL);
406 return(1);
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
418" When using only one filename, UML will detect whether to thread it like\n"
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429" an 's' will cause data to be written to disk on the host immediately.\n\n"
430);
431
432static int udb_setup(char *str)
433{
434 printk("udb%s specified on command line is almost certainly a ubd -> "
435 "udb TYPO\n", str);
436 return(1);
437}
438
439__setup("udb", udb_setup);
440__uml_help(udb_setup,
441"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700442" This option is here solely to catch ubd -> udb typos, which can be\n"
443" to impossible to catch visually unless you specifically look for\n"
444" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445" in the boot output.\n\n"
446);
447
448static int fakehd_set = 0;
449static int fakehd(char *str)
450{
451 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
452 fakehd_set = 1;
453 return 1;
454}
455
456__setup("fakehd", fakehd);
457__uml_help(fakehd,
458"fakehd\n"
459" Change the ubd device name to \"hd\".\n\n"
460);
461
462static void do_ubd_request(request_queue_t * q);
463
464/* Only changed by ubd_init, which is an initcall. */
465int thread_fd = -1;
466
467/* Changed by ubd_handler, which is serialized because interrupts only
468 * happen on CPU 0.
469 */
470int intr_count = 0;
471
472/* call ubd_finish if you need to serialize */
473static void __ubd_finish(struct request *req, int error)
474{
475 int nsect;
476
477 if(error){
478 end_request(req, 0);
479 return;
480 }
481 nsect = req->current_nr_sectors;
482 req->sector += nsect;
483 req->buffer += nsect << 9;
484 req->errors = 0;
485 req->nr_sectors -= nsect;
486 req->current_nr_sectors = 0;
487 end_request(req, 1);
488}
489
490static inline void ubd_finish(struct request *req, int error)
491{
492 spin_lock(&ubd_io_lock);
493 __ubd_finish(req, error);
494 spin_unlock(&ubd_io_lock);
495}
496
497/* Called without ubd_io_lock held */
498static void ubd_handler(void)
499{
500 struct io_thread_req req;
501 struct request *rq = elv_next_request(ubd_queue);
Jeff Dike7b9014c2005-05-20 13:59:11 -0700502 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
504 do_ubd = NULL;
505 intr_count++;
506 n = os_read_file(thread_fd, &req, sizeof(req));
507 if(n != sizeof(req)){
508 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
509 "err = %d\n", os_getpid(), -n);
510 spin_lock(&ubd_io_lock);
511 end_request(rq, 0);
512 spin_unlock(&ubd_io_lock);
513 return;
514 }
515
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 ubd_finish(rq, req.error);
517 reactivate_fd(thread_fd, UBD_IRQ);
518 do_ubd_request(ubd_queue);
519}
520
521static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
522{
523 ubd_handler();
524 return(IRQ_HANDLED);
525}
526
527/* Only changed by ubd_init, which is an initcall. */
528static int io_pid = -1;
529
530void kill_io_thread(void)
531{
532 if(io_pid != -1)
533 os_kill_process(io_pid, 1);
534}
535
536__uml_exitcall(kill_io_thread);
537
538static int ubd_file_size(struct ubd *dev, __u64 *size_out)
539{
540 char *file;
541
542 file = dev->cow.file ? dev->cow.file : dev->file;
543 return(os_file_size(file, size_out));
544}
545
546static void ubd_close(struct ubd *dev)
547{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 os_close_file(dev->fd);
549 if(dev->cow.file == NULL)
550 return;
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 os_close_file(dev->cow.fd);
553 vfree(dev->cow.bitmap);
554 dev->cow.bitmap = NULL;
555}
556
557static int ubd_open_dev(struct ubd *dev)
558{
559 struct openflags flags;
560 char **back_ptr;
561 int err, create_cow, *create_ptr;
562
563 dev->openflags = dev->boot_openflags;
564 create_cow = 0;
565 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
566 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
567 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
568 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
569 &dev->cow.data_offset, create_ptr);
570
571 if((dev->fd == -ENOENT) && create_cow){
572 dev->fd = create_cow_file(dev->file, dev->cow.file,
573 dev->openflags, 1 << 9, PAGE_SIZE,
574 &dev->cow.bitmap_offset,
575 &dev->cow.bitmap_len,
576 &dev->cow.data_offset);
577 if(dev->fd >= 0){
578 printk(KERN_INFO "Creating \"%s\" as COW file for "
579 "\"%s\"\n", dev->file, dev->cow.file);
580 }
581 }
582
583 if(dev->fd < 0){
584 printk("Failed to open '%s', errno = %d\n", dev->file,
585 -dev->fd);
586 return(dev->fd);
587 }
588
589 if(dev->cow.file != NULL){
590 err = -ENOMEM;
591 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
592 if(dev->cow.bitmap == NULL){
593 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
594 goto error;
595 }
596 flush_tlb_kernel_vm();
597
598 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
599 dev->cow.bitmap_offset,
600 dev->cow.bitmap_len);
601 if(err < 0)
602 goto error;
603
604 flags = dev->openflags;
605 flags.w = 0;
606 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
607 NULL, NULL);
608 if(err < 0) goto error;
609 dev->cow.fd = err;
610 }
611 return(0);
612 error:
613 os_close_file(dev->fd);
614 return(err);
615}
616
617static int ubd_new_disk(int major, u64 size, int unit,
618 struct gendisk **disk_out)
619
620{
621 struct gendisk *disk;
622 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
623 int err;
624
625 disk = alloc_disk(1 << UBD_SHIFT);
626 if(disk == NULL)
627 return(-ENOMEM);
628
629 disk->major = major;
630 disk->first_minor = unit << UBD_SHIFT;
631 disk->fops = &ubd_blops;
632 set_capacity(disk, size / 512);
633 if(major == MAJOR_NR){
634 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
635 sprintf(disk->devfs_name, "ubd/disc%d", unit);
636 sprintf(from, "ubd/%d", unit);
637 sprintf(to, "disc%d/disc", unit);
638 err = devfs_mk_symlink(from, to);
639 if(err)
640 printk("ubd_new_disk failed to make link from %s to "
641 "%s, error = %d\n", from, to, err);
642 }
643 else {
644 sprintf(disk->disk_name, "ubd_fake%d", unit);
645 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
646 }
647
648 /* sysfs register (not for ide fake devices) */
649 if (major == MAJOR_NR) {
650 ubd_dev[unit].pdev.id = unit;
651 ubd_dev[unit].pdev.name = DRIVER_NAME;
652 platform_device_register(&ubd_dev[unit].pdev);
653 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
654 }
655
656 disk->private_data = &ubd_dev[unit];
657 disk->queue = ubd_queue;
658 add_disk(disk);
659
660 *disk_out = disk;
661 return 0;
662}
663
664#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
665
666static int ubd_add(int n)
667{
668 struct ubd *dev = &ubd_dev[n];
669 int err;
670
671 if(dev->file == NULL)
672 return(-ENODEV);
673
674 if (ubd_open_dev(dev))
675 return(-ENODEV);
676
677 err = ubd_file_size(dev, &dev->size);
678 if(err < 0)
679 return(err);
680
681 dev->size = ROUND_BLOCK(dev->size);
682
683 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
684 if(err)
685 return(err);
686
687 if(fake_major != MAJOR_NR)
688 ubd_new_disk(fake_major, dev->size, n,
689 &fake_gendisk[n]);
690
691 /* perhaps this should also be under the "if (fake_major)" above */
692 /* using the fake_disk->disk_name and also the fakehd_set name */
693 if (fake_ide)
694 make_ide_entries(ubd_gendisk[n]->disk_name);
695
696 ubd_close(dev);
697 return 0;
698}
699
700static int ubd_config(char *str)
701{
702 int n, err;
703
704 str = uml_strdup(str);
705 if(str == NULL){
706 printk(KERN_ERR "ubd_config failed to strdup string\n");
707 return(1);
708 }
709 err = ubd_setup_common(str, &n);
710 if(err){
711 kfree(str);
712 return(-1);
713 }
714 if(n == -1) return(0);
715
716 spin_lock(&ubd_lock);
717 err = ubd_add(n);
718 if(err)
719 ubd_dev[n].file = NULL;
720 spin_unlock(&ubd_lock);
721
722 return(err);
723}
724
725static int ubd_get_config(char *name, char *str, int size, char **error_out)
726{
727 struct ubd *dev;
728 int n, len = 0;
729
730 n = parse_unit(&name);
731 if((n >= MAX_DEV) || (n < 0)){
732 *error_out = "ubd_get_config : device number out of range";
733 return(-1);
734 }
735
736 dev = &ubd_dev[n];
737 spin_lock(&ubd_lock);
738
739 if(dev->file == NULL){
740 CONFIG_CHUNK(str, size, len, "", 1);
741 goto out;
742 }
743
744 CONFIG_CHUNK(str, size, len, dev->file, 0);
745
746 if(dev->cow.file != NULL){
747 CONFIG_CHUNK(str, size, len, ",", 0);
748 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
749 }
750 else CONFIG_CHUNK(str, size, len, "", 1);
751
752 out:
753 spin_unlock(&ubd_lock);
754 return(len);
755}
756
757static int ubd_remove(char *str)
758{
759 struct ubd *dev;
760 int n, err = -ENODEV;
761
762 n = parse_unit(&str);
763
764 if((n < 0) || (n >= MAX_DEV))
765 return(err);
766
767 dev = &ubd_dev[n];
768 if(dev->count > 0)
769 return(-EBUSY); /* you cannot remove a open disk */
770
771 err = 0;
772 spin_lock(&ubd_lock);
773
774 if(ubd_gendisk[n] == NULL)
775 goto out;
776
777 del_gendisk(ubd_gendisk[n]);
778 put_disk(ubd_gendisk[n]);
779 ubd_gendisk[n] = NULL;
780
781 if(fake_gendisk[n] != NULL){
782 del_gendisk(fake_gendisk[n]);
783 put_disk(fake_gendisk[n]);
784 fake_gendisk[n] = NULL;
785 }
786
787 platform_device_unregister(&dev->pdev);
788 *dev = ((struct ubd) DEFAULT_UBD);
789 err = 0;
790 out:
791 spin_unlock(&ubd_lock);
792 return(err);
793}
794
795static struct mc_device ubd_mc = {
796 .name = "ubd",
797 .config = ubd_config,
798 .get_config = ubd_get_config,
799 .remove = ubd_remove,
800};
801
802static int ubd_mc_init(void)
803{
804 mconsole_register_dev(&ubd_mc);
805 return 0;
806}
807
808__initcall(ubd_mc_init);
809
810static struct device_driver ubd_driver = {
811 .name = DRIVER_NAME,
812 .bus = &platform_bus_type,
813};
814
815int ubd_init(void)
816{
817 int i;
818
819 devfs_mk_dir("ubd");
820 if (register_blkdev(MAJOR_NR, "ubd"))
821 return -1;
822
823 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
824 if (!ubd_queue) {
825 unregister_blkdev(MAJOR_NR, "ubd");
826 return -1;
827 }
828
829 if (fake_major != MAJOR_NR) {
830 char name[sizeof("ubd_nnn\0")];
831
832 snprintf(name, sizeof(name), "ubd_%d", fake_major);
833 devfs_mk_dir(name);
834 if (register_blkdev(fake_major, "ubd"))
835 return -1;
836 }
837 driver_register(&ubd_driver);
838 for (i = 0; i < MAX_DEV; i++)
839 ubd_add(i);
840 return 0;
841}
842
843late_initcall(ubd_init);
844
845int ubd_driver_init(void){
846 unsigned long stack;
847 int err;
848
849 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
850 if(global_openflags.s){
851 printk(KERN_INFO "ubd: Synchronous mode\n");
852 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
853 * enough. So use anyway the io thread. */
854 }
855 stack = alloc_stack(0, 0);
856 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
857 &thread_fd);
858 if(io_pid < 0){
859 printk(KERN_ERR
860 "ubd : Failed to start I/O thread (errno = %d) - "
861 "falling back to synchronous I/O\n", -io_pid);
862 io_pid = -1;
863 return(0);
864 }
865 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
866 SA_INTERRUPT, "ubd", ubd_dev);
867 if(err != 0)
868 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
869 return(err);
870}
871
872device_initcall(ubd_driver_init);
873
874static int ubd_open(struct inode *inode, struct file *filp)
875{
876 struct gendisk *disk = inode->i_bdev->bd_disk;
877 struct ubd *dev = disk->private_data;
878 int err = 0;
879
880 if(dev->count == 0){
881 err = ubd_open_dev(dev);
882 if(err){
883 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
884 disk->disk_name, dev->file, -err);
885 goto out;
886 }
887 }
888 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700889 set_disk_ro(disk, !dev->openflags.w);
890
891 /* This should no more be needed. And it didn't work anyway to exclude
892 * read-write remounting of filesystems.*/
893 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 if(--dev->count == 0) ubd_close(dev);
895 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700896 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 out:
898 return(err);
899}
900
901static int ubd_release(struct inode * inode, struct file * file)
902{
903 struct gendisk *disk = inode->i_bdev->bd_disk;
904 struct ubd *dev = disk->private_data;
905
906 if(--dev->count == 0)
907 ubd_close(dev);
908 return(0);
909}
910
911static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
912 __u64 *cow_offset, unsigned long *bitmap,
913 __u64 bitmap_offset, unsigned long *bitmap_words,
914 __u64 bitmap_len)
915{
916 __u64 sector = io_offset >> 9;
917 int i, update_bitmap = 0;
918
919 for(i = 0; i < length >> 9; i++){
920 if(cow_mask != NULL)
921 ubd_set_bit(i, (unsigned char *) cow_mask);
922 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
923 continue;
924
925 update_bitmap = 1;
926 ubd_set_bit(sector + i, (unsigned char *) bitmap);
927 }
928
929 if(!update_bitmap)
930 return;
931
932 *cow_offset = sector / (sizeof(unsigned long) * 8);
933
934 /* This takes care of the case where we're exactly at the end of the
935 * device, and *cow_offset + 1 is off the end. So, just back it up
936 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
937 * for the original diagnosis.
938 */
939 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
940 sizeof(unsigned long) - 1))
941 (*cow_offset)--;
942
943 bitmap_words[0] = bitmap[*cow_offset];
944 bitmap_words[1] = bitmap[*cow_offset + 1];
945
946 *cow_offset *= sizeof(unsigned long);
947 *cow_offset += bitmap_offset;
948}
949
950static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
951 __u64 bitmap_offset, __u64 bitmap_len)
952{
953 __u64 sector = req->offset >> 9;
954 int i;
955
956 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
957 panic("Operation too long");
958
959 if(req->op == UBD_READ) {
960 for(i = 0; i < req->length >> 9; i++){
961 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
962 ubd_set_bit(i, (unsigned char *)
963 &req->sector_mask);
964 }
965 }
966 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
967 &req->cow_offset, bitmap, bitmap_offset,
968 req->bitmap_words, bitmap_len);
969}
970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971/* Called with ubd_io_lock held */
972static int prepare_request(struct request *req, struct io_thread_req *io_req)
973{
974 struct gendisk *disk = req->rq_disk;
975 struct ubd *dev = disk->private_data;
976 __u64 offset;
Jeff Dike7b9014c2005-05-20 13:59:11 -0700977 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
979 if(req->rq_status == RQ_INACTIVE) return(1);
980
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700981 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
983 printk("Write attempted on readonly ubd device %s\n",
984 disk->disk_name);
985 end_request(req, 0);
986 return(1);
987 }
988
989 offset = ((__u64) req->sector) << 9;
990 len = req->current_nr_sectors << 9;
991
992 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
993 io_req->fds[1] = dev->fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 io_req->cow_offset = -1;
995 io_req->offset = offset;
996 io_req->length = len;
997 io_req->error = 0;
998 io_req->sector_mask = 0;
999
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1001 io_req->offsets[0] = 0;
1002 io_req->offsets[1] = dev->cow.data_offset;
1003 io_req->buffer = req->buffer;
1004 io_req->sectorsize = 1 << 9;
1005
1006 if(dev->cow.file != NULL)
1007 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1008 dev->cow.bitmap_len);
1009
1010 return(0);
1011}
1012
1013/* Called with ubd_io_lock held */
1014static void do_ubd_request(request_queue_t *q)
1015{
1016 struct io_thread_req io_req;
1017 struct request *req;
1018 int err, n;
1019
1020 if(thread_fd == -1){
1021 while((req = elv_next_request(q)) != NULL){
1022 err = prepare_request(req, &io_req);
1023 if(!err){
1024 do_io(&io_req);
1025 __ubd_finish(req, io_req.error);
1026 }
1027 }
1028 }
1029 else {
1030 if(do_ubd || (req = elv_next_request(q)) == NULL)
1031 return;
1032 err = prepare_request(req, &io_req);
1033 if(!err){
1034 do_ubd = ubd_handler;
1035 n = os_write_file(thread_fd, (char *) &io_req,
1036 sizeof(io_req));
1037 if(n != sizeof(io_req))
1038 printk("write to io thread failed, "
1039 "errno = %d\n", -n);
1040 }
1041 }
1042}
1043
1044static int ubd_ioctl(struct inode * inode, struct file * file,
1045 unsigned int cmd, unsigned long arg)
1046{
1047 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1048 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1049 struct hd_driveid ubd_id = {
1050 .cyls = 0,
1051 .heads = 128,
1052 .sectors = 32,
1053 };
1054
1055 switch (cmd) {
1056 struct hd_geometry g;
1057 struct cdrom_volctrl volume;
1058 case HDIO_GETGEO:
1059 if(!loc) return(-EINVAL);
1060 g.heads = 128;
1061 g.sectors = 32;
1062 g.cylinders = dev->size / (128 * 32 * 512);
1063 g.start = get_start_sect(inode->i_bdev);
1064 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1065
1066 case HDIO_GET_IDENTITY:
1067 ubd_id.cyls = dev->size / (128 * 32 * 512);
1068 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1069 sizeof(ubd_id)))
1070 return(-EFAULT);
1071 return(0);
1072
1073 case CDROMVOLREAD:
1074 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1075 return(-EFAULT);
1076 volume.channel0 = 255;
1077 volume.channel1 = 255;
1078 volume.channel2 = 255;
1079 volume.channel3 = 255;
1080 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1081 return(-EFAULT);
1082 return(0);
1083 }
1084 return(-EINVAL);
1085}
1086
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1088{
1089 struct uml_stat buf1, buf2;
1090 int err;
1091
1092 if(from_cmdline == NULL) return(1);
1093 if(!strcmp(from_cmdline, from_cow)) return(1);
1094
1095 err = os_stat_file(from_cmdline, &buf1);
1096 if(err < 0){
1097 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1098 return(1);
1099 }
1100 err = os_stat_file(from_cow, &buf2);
1101 if(err < 0){
1102 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1103 return(1);
1104 }
1105 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1106 return(1);
1107
1108 printk("Backing file mismatch - \"%s\" requested,\n"
1109 "\"%s\" specified in COW header of \"%s\"\n",
1110 from_cmdline, from_cow, cow);
1111 return(0);
1112}
1113
1114static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1115{
1116 unsigned long modtime;
1117 long long actual;
1118 int err;
1119
1120 err = os_file_modtime(file, &modtime);
1121 if(err < 0){
1122 printk("Failed to get modification time of backing file "
1123 "\"%s\", err = %d\n", file, -err);
1124 return(err);
1125 }
1126
1127 err = os_file_size(file, &actual);
1128 if(err < 0){
1129 printk("Failed to get size of backing file \"%s\", "
1130 "err = %d\n", file, -err);
1131 return(err);
1132 }
1133
1134 if(actual != size){
1135 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1136 * the typecast.*/
1137 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1138 "file\n", (unsigned long long) size, actual);
1139 return(-EINVAL);
1140 }
1141 if(modtime != mtime){
1142 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1143 "file\n", mtime, modtime);
1144 return(-EINVAL);
1145 }
1146 return(0);
1147}
1148
1149int read_cow_bitmap(int fd, void *buf, int offset, int len)
1150{
1151 int err;
1152
1153 err = os_seek_file(fd, offset);
1154 if(err < 0)
1155 return(err);
1156
1157 err = os_read_file(fd, buf, len);
1158 if(err < 0)
1159 return(err);
1160
1161 return(0);
1162}
1163
1164int open_ubd_file(char *file, struct openflags *openflags,
1165 char **backing_file_out, int *bitmap_offset_out,
1166 unsigned long *bitmap_len_out, int *data_offset_out,
1167 int *create_cow_out)
1168{
1169 time_t mtime;
1170 unsigned long long size;
1171 __u32 version, align;
1172 char *backing_file;
1173 int fd, err, sectorsize, same, mode = 0644;
1174
1175 fd = os_open_file(file, *openflags, mode);
1176 if(fd < 0){
1177 if((fd == -ENOENT) && (create_cow_out != NULL))
1178 *create_cow_out = 1;
1179 if(!openflags->w ||
1180 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1181 openflags->w = 0;
1182 fd = os_open_file(file, *openflags, mode);
1183 if(fd < 0)
1184 return(fd);
1185 }
1186
1187 err = os_lock_file(fd, openflags->w);
1188 if(err < 0){
1189 printk("Failed to lock '%s', err = %d\n", file, -err);
1190 goto out_close;
1191 }
1192
1193 if(backing_file_out == NULL) return(fd);
1194
1195 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1196 &size, &sectorsize, &align, bitmap_offset_out);
1197 if(err && (*backing_file_out != NULL)){
1198 printk("Failed to read COW header from COW file \"%s\", "
1199 "errno = %d\n", file, -err);
1200 goto out_close;
1201 }
1202 if(err) return(fd);
1203
1204 if(backing_file_out == NULL) return(fd);
1205
1206 same = same_backing_files(*backing_file_out, backing_file, file);
1207
1208 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1209 printk("Switching backing file to '%s'\n", *backing_file_out);
1210 err = write_cow_header(file, fd, *backing_file_out,
1211 sectorsize, align, &size);
1212 if(err){
1213 printk("Switch failed, errno = %d\n", -err);
1214 return(err);
1215 }
1216 }
1217 else {
1218 *backing_file_out = backing_file;
1219 err = backing_file_mismatch(*backing_file_out, size, mtime);
1220 if(err) goto out_close;
1221 }
1222
1223 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1224 bitmap_len_out, data_offset_out);
1225
1226 return(fd);
1227 out_close:
1228 os_close_file(fd);
1229 return(err);
1230}
1231
1232int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1233 int sectorsize, int alignment, int *bitmap_offset_out,
1234 unsigned long *bitmap_len_out, int *data_offset_out)
1235{
1236 int err, fd;
1237
1238 flags.c = 1;
1239 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1240 if(fd < 0){
1241 err = fd;
1242 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1243 -err);
1244 goto out;
1245 }
1246
1247 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1248 bitmap_offset_out, bitmap_len_out,
1249 data_offset_out);
1250 if(!err)
1251 return(fd);
1252 os_close_file(fd);
1253 out:
1254 return(err);
1255}
1256
1257static int update_bitmap(struct io_thread_req *req)
1258{
1259 int n;
1260
1261 if(req->cow_offset == -1)
1262 return(0);
1263
1264 n = os_seek_file(req->fds[1], req->cow_offset);
1265 if(n < 0){
1266 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1267 return(1);
1268 }
1269
1270 n = os_write_file(req->fds[1], &req->bitmap_words,
1271 sizeof(req->bitmap_words));
1272 if(n != sizeof(req->bitmap_words)){
1273 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1274 req->fds[1]);
1275 return(1);
1276 }
1277
1278 return(0);
1279}
1280
1281void do_io(struct io_thread_req *req)
1282{
1283 char *buf;
1284 unsigned long len;
1285 int n, nsectors, start, end, bit;
1286 int err;
1287 __u64 off;
1288
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 nsectors = req->length / req->sectorsize;
1290 start = 0;
1291 do {
1292 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1293 end = start;
1294 while((end < nsectors) &&
1295 (ubd_test_bit(end, (unsigned char *)
1296 &req->sector_mask) == bit))
1297 end++;
1298
1299 off = req->offset + req->offsets[bit] +
1300 start * req->sectorsize;
1301 len = (end - start) * req->sectorsize;
1302 buf = &req->buffer[start * req->sectorsize];
1303
1304 err = os_seek_file(req->fds[bit], off);
1305 if(err < 0){
1306 printk("do_io - lseek failed : err = %d\n", -err);
1307 req->error = 1;
1308 return;
1309 }
1310 if(req->op == UBD_READ){
1311 n = 0;
1312 do {
1313 buf = &buf[n];
1314 len -= n;
1315 n = os_read_file(req->fds[bit], buf, len);
1316 if (n < 0) {
1317 printk("do_io - read failed, err = %d "
1318 "fd = %d\n", -n, req->fds[bit]);
1319 req->error = 1;
1320 return;
1321 }
1322 } while((n < len) && (n != 0));
1323 if (n < len) memset(&buf[n], 0, len - n);
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001324 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 n = os_write_file(req->fds[bit], buf, len);
1326 if(n != len){
1327 printk("do_io - write failed err = %d "
1328 "fd = %d\n", -n, req->fds[bit]);
1329 req->error = 1;
1330 return;
1331 }
1332 }
1333
1334 start = end;
1335 } while(start < nsectors);
1336
1337 req->error = update_bitmap(req);
1338}
1339
1340/* Changed in start_io_thread, which is serialized by being called only
1341 * from ubd_init, which is an initcall.
1342 */
1343int kernel_fd = -1;
1344
1345/* Only changed by the io thread */
1346int io_count = 0;
1347
1348int io_thread(void *arg)
1349{
1350 struct io_thread_req req;
1351 int n;
1352
1353 ignore_sigwinch_sig();
1354 while(1){
1355 n = os_read_file(kernel_fd, &req, sizeof(req));
1356 if(n != sizeof(req)){
1357 if(n < 0)
1358 printk("io_thread - read failed, fd = %d, "
1359 "err = %d\n", kernel_fd, -n);
1360 else {
1361 printk("io_thread - short read, fd = %d, "
1362 "length = %d\n", kernel_fd, n);
1363 }
1364 continue;
1365 }
1366 io_count++;
1367 do_io(&req);
1368 n = os_write_file(kernel_fd, &req, sizeof(req));
1369 if(n != sizeof(req))
1370 printk("io_thread - write failed, fd = %d, err = %d\n",
1371 kernel_fd, -n);
1372 }
1373}
1374
1375/*
1376 * Overrides for Emacs so that we follow Linus's tabbing style.
1377 * Emacs will notice this stuff at the end of the file and automatically
1378 * adjust the settings for this buffer only. This must remain at the end
1379 * of the file.
1380 * ---------------------------------------------------------------------------
1381 * Local variables:
1382 * c-file-style: "linux"
1383 * End:
1384 */