blob: 344b24d09a7c07a773f81ab99ab6b4688675b6d9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/config.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/devfs_fs_kernel.h"
29#include "linux/cdrom.h"
30#include "linux/proc_fs.h"
31#include "linux/ctype.h"
32#include "linux/capability.h"
33#include "linux/mm.h"
34#include "linux/vmalloc.h"
35#include "linux/blkpg.h"
36#include "linux/genhd.h"
37#include "linux/spinlock.h"
38#include "asm/segment.h"
39#include "asm/uaccess.h"
40#include "asm/irq.h"
41#include "asm/types.h"
42#include "asm/tlbflush.h"
43#include "user_util.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
Jeff Dike7b9014c2005-05-20 13:59:11 -070057enum ubd_req { UBD_READ, UBD_WRITE };
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59struct io_thread_req {
60 enum ubd_req op;
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
64 unsigned long length;
65 char *buffer;
66 int sectorsize;
67 unsigned long sector_mask;
68 unsigned long long cow_offset;
69 unsigned long bitmap_words[2];
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 int error;
71};
72
73extern int open_ubd_file(char *file, struct openflags *openflags,
74 char **backing_file_out, int *bitmap_offset_out,
75 unsigned long *bitmap_len_out, int *data_offset_out,
76 int *create_cow_out);
77extern int create_cow_file(char *cow_file, char *backing_file,
78 struct openflags flags, int sectorsize,
79 int alignment, int *bitmap_offset_out,
80 unsigned long *bitmap_len_out,
81 int *data_offset_out);
82extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
83extern void do_io(struct io_thread_req *req);
84
85static inline int ubd_test_bit(__u64 bit, unsigned char *data)
86{
87 __u64 n;
88 int bits, off;
89
90 bits = sizeof(data[0]) * 8;
91 n = bit / bits;
92 off = bit % bits;
93 return((data[n] & (1 << off)) != 0);
94}
95
96static inline void ubd_set_bit(__u64 bit, unsigned char *data)
97{
98 __u64 n;
99 int bits, off;
100
101 bits = sizeof(data[0]) * 8;
102 n = bit / bits;
103 off = bit % bits;
104 data[n] |= (1 << off);
105}
106/*End stuff from ubd_user.h*/
107
108#define DRIVER_NAME "uml-blkdev"
109
110static DEFINE_SPINLOCK(ubd_io_lock);
111static DEFINE_SPINLOCK(ubd_lock);
112
113static void (*do_ubd)(void);
114
115static int ubd_open(struct inode * inode, struct file * filp);
116static int ubd_release(struct inode * inode, struct file * file);
117static int ubd_ioctl(struct inode * inode, struct file * file,
118 unsigned int cmd, unsigned long arg);
119
120#define MAX_DEV (8)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
127};
128
129/* Protected by the queue_lock */
130static request_queue_t *ubd_queue;
131
132/* Protected by ubd_lock */
133static int fake_major = MAJOR_NR;
134
135static struct gendisk *ubd_gendisk[MAX_DEV];
136static struct gendisk *fake_gendisk[MAX_DEV];
137
138#ifdef CONFIG_BLK_DEV_UBD_SYNC
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
140 .cl = 1 })
141#else
142#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
143 .cl = 1 })
144#endif
145
146/* Not protected - changed only in ubd_setup_common and then only to
147 * to enable O_SYNC.
148 */
149static struct openflags global_openflags = OPEN_FLAGS;
150
151struct cow {
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700152 /* This is the backing file, actually */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 char *file;
154 int fd;
155 unsigned long *bitmap;
156 unsigned long bitmap_len;
157 int bitmap_offset;
158 int data_offset;
159};
160
161struct ubd {
162 char *file;
163 int count;
164 int fd;
165 __u64 size;
166 struct openflags boot_openflags;
167 struct openflags openflags;
168 int no_cow;
169 struct cow cow;
170 struct platform_device pdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define DEFAULT_COW { \
174 .file = NULL, \
175 .fd = -1, \
176 .bitmap = NULL, \
177 .bitmap_offset = 0, \
178 .data_offset = 0, \
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
183 .count = 0, \
184 .fd = -1, \
185 .size = -1, \
186 .boot_openflags = OPEN_FLAGS, \
187 .openflags = OPEN_FLAGS, \
188 .no_cow = 0, \
189 .cow = DEFAULT_COW, \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190}
191
192struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
193
194static int ubd0_init(void)
195{
196 struct ubd *dev = &ubd_dev[0];
197
198 if(dev->file == NULL)
199 dev->file = "root_fs";
200 return(0);
201}
202
203__initcall(ubd0_init);
204
205/* Only changed by fake_ide_setup which is a setup */
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
233static void make_ide_entries(char *dev_name)
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
245 ent->nlink = 1;
246 ent->data = NULL;
247 ent->read_proc = proc_ide_read_media;
248 ent->write_proc = NULL;
249 sprintf(name,"ide0/%s", dev_name);
250 proc_symlink(dev_name, proc_ide_root, name);
251}
252
253static int fake_ide_setup(char *str)
254{
255 fake_ide = 1;
256 return(1);
257}
258
259__setup("fake_ide", fake_ide_setup);
260
261__uml_help(fake_ide_setup,
262"fake_ide\n"
263" Create ide0 entries that map onto ubd devices.\n\n"
264);
265
266static int parse_unit(char **ptr)
267{
268 char *str = *ptr, *end;
269 int n = -1;
270
271 if(isdigit(*str)) {
272 n = simple_strtoul(str, &end, 0);
273 if(end == str)
274 return(-1);
275 *ptr = end;
276 }
277 else if (('a' <= *str) && (*str <= 'h')) {
278 n = *str - 'a';
279 str++;
280 *ptr = str;
281 }
282 return(n);
283}
284
285static int ubd_setup_common(char *str, int *index_out)
286{
287 struct ubd *dev;
288 struct openflags flags = global_openflags;
289 char *backing_file;
290 int n, err, i;
291
292 if(index_out) *index_out = -1;
293 n = *str;
294 if(n == '='){
295 char *end;
296 int major;
297
298 str++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 if(!strcmp(str, "sync")){
300 global_openflags = of_sync(global_openflags);
301 return(0);
302 }
303 major = simple_strtoul(str, &end, 0);
304 if((*end != '\0') || (end == str)){
305 printk(KERN_ERR
306 "ubd_setup : didn't parse major number\n");
307 return(1);
308 }
309
310 err = 1;
311 spin_lock(&ubd_lock);
312 if(fake_major != MAJOR_NR){
313 printk(KERN_ERR "Can't assign a fake major twice\n");
314 goto out1;
315 }
316
317 fake_major = major;
318
319 printk(KERN_INFO "Setting extra ubd major number to %d\n",
320 major);
321 err = 0;
322 out1:
323 spin_unlock(&ubd_lock);
324 return(err);
325 }
326
327 n = parse_unit(&str);
328 if(n < 0){
329 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
330 "'%s'\n", str);
331 return(1);
332 }
333 if(n >= MAX_DEV){
334 printk(KERN_ERR "ubd_setup : index %d out of range "
335 "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
336 return(1);
337 }
338
339 err = 1;
340 spin_lock(&ubd_lock);
341
342 dev = &ubd_dev[n];
343 if(dev->file != NULL){
344 printk(KERN_ERR "ubd_setup : device already configured\n");
345 goto out;
346 }
347
348 if (index_out)
349 *index_out = n;
350
351 for (i = 0; i < 4; i++) {
352 switch (*str) {
353 case 'r':
354 flags.w = 0;
355 break;
356 case 's':
357 flags.s = 1;
358 break;
359 case 'd':
360 dev->no_cow = 1;
361 break;
362 case '=':
363 str++;
364 goto break_loop;
365 default:
366 printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
367 goto out;
368 }
369 str++;
370 }
371
372 if (*str == '=')
373 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
374 else
375 printk(KERN_ERR "ubd_setup : Expected '='\n");
376 goto out;
377
378break_loop:
379 err = 0;
380 backing_file = strchr(str, ',');
381
382 if (!backing_file) {
383 backing_file = strchr(str, ':');
384 }
385
386 if(backing_file){
387 if(dev->no_cow)
388 printk(KERN_ERR "Can't specify both 'd' and a "
389 "cow file\n");
390 else {
391 *backing_file = '\0';
392 backing_file++;
393 }
394 }
395 dev->file = str;
396 dev->cow.file = backing_file;
397 dev->boot_openflags = flags;
398out:
399 spin_unlock(&ubd_lock);
400 return(err);
401}
402
403static int ubd_setup(char *str)
404{
405 ubd_setup_common(str, NULL);
406 return(1);
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
418" When using only one filename, UML will detect whether to thread it like\n"
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429" an 's' will cause data to be written to disk on the host immediately.\n\n"
430);
431
432static int udb_setup(char *str)
433{
434 printk("udb%s specified on command line is almost certainly a ubd -> "
435 "udb TYPO\n", str);
436 return(1);
437}
438
439__setup("udb", udb_setup);
440__uml_help(udb_setup,
441"udb\n"
Jeff Dike0894e272005-05-28 15:51:55 -0700442" This option is here solely to catch ubd -> udb typos, which can be\n"
443" to impossible to catch visually unless you specifically look for\n"
444" them. The only result of any option starting with 'udb' is an error\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445" in the boot output.\n\n"
446);
447
448static int fakehd_set = 0;
449static int fakehd(char *str)
450{
451 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
452 fakehd_set = 1;
453 return 1;
454}
455
456__setup("fakehd", fakehd);
457__uml_help(fakehd,
458"fakehd\n"
459" Change the ubd device name to \"hd\".\n\n"
460);
461
462static void do_ubd_request(request_queue_t * q);
463
464/* Only changed by ubd_init, which is an initcall. */
465int thread_fd = -1;
466
467/* Changed by ubd_handler, which is serialized because interrupts only
468 * happen on CPU 0.
469 */
470int intr_count = 0;
471
472/* call ubd_finish if you need to serialize */
473static void __ubd_finish(struct request *req, int error)
474{
475 int nsect;
476
477 if(error){
478 end_request(req, 0);
479 return;
480 }
481 nsect = req->current_nr_sectors;
482 req->sector += nsect;
483 req->buffer += nsect << 9;
484 req->errors = 0;
485 req->nr_sectors -= nsect;
486 req->current_nr_sectors = 0;
487 end_request(req, 1);
488}
489
490static inline void ubd_finish(struct request *req, int error)
491{
492 spin_lock(&ubd_io_lock);
493 __ubd_finish(req, error);
494 spin_unlock(&ubd_io_lock);
495}
496
497/* Called without ubd_io_lock held */
498static void ubd_handler(void)
499{
500 struct io_thread_req req;
501 struct request *rq = elv_next_request(ubd_queue);
Jeff Dike7b9014c2005-05-20 13:59:11 -0700502 int n;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
504 do_ubd = NULL;
505 intr_count++;
506 n = os_read_file(thread_fd, &req, sizeof(req));
507 if(n != sizeof(req)){
508 printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
509 "err = %d\n", os_getpid(), -n);
510 spin_lock(&ubd_io_lock);
511 end_request(rq, 0);
512 spin_unlock(&ubd_io_lock);
513 return;
514 }
515
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 ubd_finish(rq, req.error);
517 reactivate_fd(thread_fd, UBD_IRQ);
518 do_ubd_request(ubd_queue);
519}
520
521static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
522{
523 ubd_handler();
524 return(IRQ_HANDLED);
525}
526
527/* Only changed by ubd_init, which is an initcall. */
528static int io_pid = -1;
529
530void kill_io_thread(void)
531{
532 if(io_pid != -1)
533 os_kill_process(io_pid, 1);
534}
535
536__uml_exitcall(kill_io_thread);
537
538static int ubd_file_size(struct ubd *dev, __u64 *size_out)
539{
540 char *file;
541
542 file = dev->cow.file ? dev->cow.file : dev->file;
543 return(os_file_size(file, size_out));
544}
545
546static void ubd_close(struct ubd *dev)
547{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 os_close_file(dev->fd);
549 if(dev->cow.file == NULL)
550 return;
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 os_close_file(dev->cow.fd);
553 vfree(dev->cow.bitmap);
554 dev->cow.bitmap = NULL;
555}
556
557static int ubd_open_dev(struct ubd *dev)
558{
559 struct openflags flags;
560 char **back_ptr;
561 int err, create_cow, *create_ptr;
562
563 dev->openflags = dev->boot_openflags;
564 create_cow = 0;
565 create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
566 back_ptr = dev->no_cow ? NULL : &dev->cow.file;
567 dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
568 &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
569 &dev->cow.data_offset, create_ptr);
570
571 if((dev->fd == -ENOENT) && create_cow){
572 dev->fd = create_cow_file(dev->file, dev->cow.file,
573 dev->openflags, 1 << 9, PAGE_SIZE,
574 &dev->cow.bitmap_offset,
575 &dev->cow.bitmap_len,
576 &dev->cow.data_offset);
577 if(dev->fd >= 0){
578 printk(KERN_INFO "Creating \"%s\" as COW file for "
579 "\"%s\"\n", dev->file, dev->cow.file);
580 }
581 }
582
583 if(dev->fd < 0){
584 printk("Failed to open '%s', errno = %d\n", dev->file,
585 -dev->fd);
586 return(dev->fd);
587 }
588
589 if(dev->cow.file != NULL){
590 err = -ENOMEM;
591 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
592 if(dev->cow.bitmap == NULL){
593 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
594 goto error;
595 }
596 flush_tlb_kernel_vm();
597
598 err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
599 dev->cow.bitmap_offset,
600 dev->cow.bitmap_len);
601 if(err < 0)
602 goto error;
603
604 flags = dev->openflags;
605 flags.w = 0;
606 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL,
607 NULL, NULL);
608 if(err < 0) goto error;
609 dev->cow.fd = err;
610 }
611 return(0);
612 error:
613 os_close_file(dev->fd);
614 return(err);
615}
616
617static int ubd_new_disk(int major, u64 size, int unit,
618 struct gendisk **disk_out)
619
620{
621 struct gendisk *disk;
622 char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
623 int err;
624
625 disk = alloc_disk(1 << UBD_SHIFT);
626 if(disk == NULL)
627 return(-ENOMEM);
628
629 disk->major = major;
630 disk->first_minor = unit << UBD_SHIFT;
631 disk->fops = &ubd_blops;
632 set_capacity(disk, size / 512);
633 if(major == MAJOR_NR){
634 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
635 sprintf(disk->devfs_name, "ubd/disc%d", unit);
636 sprintf(from, "ubd/%d", unit);
637 sprintf(to, "disc%d/disc", unit);
638 err = devfs_mk_symlink(from, to);
639 if(err)
640 printk("ubd_new_disk failed to make link from %s to "
641 "%s, error = %d\n", from, to, err);
642 }
643 else {
644 sprintf(disk->disk_name, "ubd_fake%d", unit);
645 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
646 }
647
648 /* sysfs register (not for ide fake devices) */
649 if (major == MAJOR_NR) {
650 ubd_dev[unit].pdev.id = unit;
651 ubd_dev[unit].pdev.name = DRIVER_NAME;
652 platform_device_register(&ubd_dev[unit].pdev);
653 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
654 }
655
656 disk->private_data = &ubd_dev[unit];
657 disk->queue = ubd_queue;
658 add_disk(disk);
659
660 *disk_out = disk;
661 return 0;
662}
663
664#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
665
666static int ubd_add(int n)
667{
668 struct ubd *dev = &ubd_dev[n];
669 int err;
670
671 if(dev->file == NULL)
672 return(-ENODEV);
673
674 if (ubd_open_dev(dev))
675 return(-ENODEV);
676
677 err = ubd_file_size(dev, &dev->size);
678 if(err < 0)
679 return(err);
680
681 dev->size = ROUND_BLOCK(dev->size);
682
683 err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
684 if(err)
685 return(err);
686
687 if(fake_major != MAJOR_NR)
688 ubd_new_disk(fake_major, dev->size, n,
689 &fake_gendisk[n]);
690
691 /* perhaps this should also be under the "if (fake_major)" above */
692 /* using the fake_disk->disk_name and also the fakehd_set name */
693 if (fake_ide)
694 make_ide_entries(ubd_gendisk[n]->disk_name);
695
696 ubd_close(dev);
697 return 0;
698}
699
700static int ubd_config(char *str)
701{
702 int n, err;
703
704 str = uml_strdup(str);
705 if(str == NULL){
706 printk(KERN_ERR "ubd_config failed to strdup string\n");
707 return(1);
708 }
709 err = ubd_setup_common(str, &n);
710 if(err){
711 kfree(str);
712 return(-1);
713 }
714 if(n == -1) return(0);
715
716 spin_lock(&ubd_lock);
717 err = ubd_add(n);
718 if(err)
719 ubd_dev[n].file = NULL;
720 spin_unlock(&ubd_lock);
721
722 return(err);
723}
724
725static int ubd_get_config(char *name, char *str, int size, char **error_out)
726{
727 struct ubd *dev;
728 int n, len = 0;
729
730 n = parse_unit(&name);
731 if((n >= MAX_DEV) || (n < 0)){
732 *error_out = "ubd_get_config : device number out of range";
733 return(-1);
734 }
735
736 dev = &ubd_dev[n];
737 spin_lock(&ubd_lock);
738
739 if(dev->file == NULL){
740 CONFIG_CHUNK(str, size, len, "", 1);
741 goto out;
742 }
743
744 CONFIG_CHUNK(str, size, len, dev->file, 0);
745
746 if(dev->cow.file != NULL){
747 CONFIG_CHUNK(str, size, len, ",", 0);
748 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
749 }
750 else CONFIG_CHUNK(str, size, len, "", 1);
751
752 out:
753 spin_unlock(&ubd_lock);
754 return(len);
755}
756
Jeff Dike29d56cf2005-06-25 14:55:25 -0700757static int ubd_id(char **str, int *start_out, int *end_out)
758{
759 int n;
760
761 n = parse_unit(str);
762 *start_out = 0;
763 *end_out = MAX_DEV - 1;
764 return n;
765}
766
767static int ubd_remove(int n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768{
769 struct ubd *dev;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700770 int err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
Jeff Dike29d56cf2005-06-25 14:55:25 -0700772 spin_lock(&ubd_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773
774 if(ubd_gendisk[n] == NULL)
775 goto out;
776
Jeff Dike29d56cf2005-06-25 14:55:25 -0700777 dev = &ubd_dev[n];
778
779 if(dev->file == NULL)
780 goto out;
781
782 /* you cannot remove a open disk */
783 err = -EBUSY;
784 if(dev->count > 0)
785 goto out;
786
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 del_gendisk(ubd_gendisk[n]);
788 put_disk(ubd_gendisk[n]);
789 ubd_gendisk[n] = NULL;
790
791 if(fake_gendisk[n] != NULL){
792 del_gendisk(fake_gendisk[n]);
793 put_disk(fake_gendisk[n]);
794 fake_gendisk[n] = NULL;
795 }
796
797 platform_device_unregister(&dev->pdev);
798 *dev = ((struct ubd) DEFAULT_UBD);
799 err = 0;
Jeff Dike29d56cf2005-06-25 14:55:25 -0700800out:
801 spin_unlock(&ubd_lock);
802 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803}
804
805static struct mc_device ubd_mc = {
806 .name = "ubd",
807 .config = ubd_config,
808 .get_config = ubd_get_config,
Jeff Dike29d56cf2005-06-25 14:55:25 -0700809 .id = ubd_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 .remove = ubd_remove,
811};
812
813static int ubd_mc_init(void)
814{
815 mconsole_register_dev(&ubd_mc);
816 return 0;
817}
818
819__initcall(ubd_mc_init);
820
821static struct device_driver ubd_driver = {
822 .name = DRIVER_NAME,
823 .bus = &platform_bus_type,
824};
825
826int ubd_init(void)
827{
828 int i;
829
830 devfs_mk_dir("ubd");
831 if (register_blkdev(MAJOR_NR, "ubd"))
832 return -1;
833
834 ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
835 if (!ubd_queue) {
836 unregister_blkdev(MAJOR_NR, "ubd");
837 return -1;
838 }
839
840 if (fake_major != MAJOR_NR) {
841 char name[sizeof("ubd_nnn\0")];
842
843 snprintf(name, sizeof(name), "ubd_%d", fake_major);
844 devfs_mk_dir(name);
845 if (register_blkdev(fake_major, "ubd"))
846 return -1;
847 }
848 driver_register(&ubd_driver);
849 for (i = 0; i < MAX_DEV; i++)
850 ubd_add(i);
851 return 0;
852}
853
854late_initcall(ubd_init);
855
856int ubd_driver_init(void){
857 unsigned long stack;
858 int err;
859
860 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
861 if(global_openflags.s){
862 printk(KERN_INFO "ubd: Synchronous mode\n");
863 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
864 * enough. So use anyway the io thread. */
865 }
866 stack = alloc_stack(0, 0);
867 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
868 &thread_fd);
869 if(io_pid < 0){
870 printk(KERN_ERR
871 "ubd : Failed to start I/O thread (errno = %d) - "
872 "falling back to synchronous I/O\n", -io_pid);
873 io_pid = -1;
874 return(0);
875 }
876 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
877 SA_INTERRUPT, "ubd", ubd_dev);
878 if(err != 0)
879 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
880 return(err);
881}
882
883device_initcall(ubd_driver_init);
884
885static int ubd_open(struct inode *inode, struct file *filp)
886{
887 struct gendisk *disk = inode->i_bdev->bd_disk;
888 struct ubd *dev = disk->private_data;
889 int err = 0;
890
891 if(dev->count == 0){
892 err = ubd_open_dev(dev);
893 if(err){
894 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
895 disk->disk_name, dev->file, -err);
896 goto out;
897 }
898 }
899 dev->count++;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700900 set_disk_ro(disk, !dev->openflags.w);
901
902 /* This should no more be needed. And it didn't work anyway to exclude
903 * read-write remounting of filesystems.*/
904 /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if(--dev->count == 0) ubd_close(dev);
906 err = -EROFS;
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700907 }*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 out:
909 return(err);
910}
911
912static int ubd_release(struct inode * inode, struct file * file)
913{
914 struct gendisk *disk = inode->i_bdev->bd_disk;
915 struct ubd *dev = disk->private_data;
916
917 if(--dev->count == 0)
918 ubd_close(dev);
919 return(0);
920}
921
922static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
923 __u64 *cow_offset, unsigned long *bitmap,
924 __u64 bitmap_offset, unsigned long *bitmap_words,
925 __u64 bitmap_len)
926{
927 __u64 sector = io_offset >> 9;
928 int i, update_bitmap = 0;
929
930 for(i = 0; i < length >> 9; i++){
931 if(cow_mask != NULL)
932 ubd_set_bit(i, (unsigned char *) cow_mask);
933 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
934 continue;
935
936 update_bitmap = 1;
937 ubd_set_bit(sector + i, (unsigned char *) bitmap);
938 }
939
940 if(!update_bitmap)
941 return;
942
943 *cow_offset = sector / (sizeof(unsigned long) * 8);
944
945 /* This takes care of the case where we're exactly at the end of the
946 * device, and *cow_offset + 1 is off the end. So, just back it up
947 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
948 * for the original diagnosis.
949 */
950 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
951 sizeof(unsigned long) - 1))
952 (*cow_offset)--;
953
954 bitmap_words[0] = bitmap[*cow_offset];
955 bitmap_words[1] = bitmap[*cow_offset + 1];
956
957 *cow_offset *= sizeof(unsigned long);
958 *cow_offset += bitmap_offset;
959}
960
961static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
962 __u64 bitmap_offset, __u64 bitmap_len)
963{
964 __u64 sector = req->offset >> 9;
965 int i;
966
967 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
968 panic("Operation too long");
969
970 if(req->op == UBD_READ) {
971 for(i = 0; i < req->length >> 9; i++){
972 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
973 ubd_set_bit(i, (unsigned char *)
974 &req->sector_mask);
975 }
976 }
977 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
978 &req->cow_offset, bitmap, bitmap_offset,
979 req->bitmap_words, bitmap_len);
980}
981
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982/* Called with ubd_io_lock held */
983static int prepare_request(struct request *req, struct io_thread_req *io_req)
984{
985 struct gendisk *disk = req->rq_disk;
986 struct ubd *dev = disk->private_data;
987 __u64 offset;
Jeff Dike7b9014c2005-05-20 13:59:11 -0700988 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989
990 if(req->rq_status == RQ_INACTIVE) return(1);
991
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -0700992 /* This should be impossible now */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
994 printk("Write attempted on readonly ubd device %s\n",
995 disk->disk_name);
996 end_request(req, 0);
997 return(1);
998 }
999
1000 offset = ((__u64) req->sector) << 9;
1001 len = req->current_nr_sectors << 9;
1002
1003 io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
1004 io_req->fds[1] = dev->fd;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 io_req->cow_offset = -1;
1006 io_req->offset = offset;
1007 io_req->length = len;
1008 io_req->error = 0;
1009 io_req->sector_mask = 0;
1010
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1012 io_req->offsets[0] = 0;
1013 io_req->offsets[1] = dev->cow.data_offset;
1014 io_req->buffer = req->buffer;
1015 io_req->sectorsize = 1 << 9;
1016
1017 if(dev->cow.file != NULL)
1018 cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
1019 dev->cow.bitmap_len);
1020
1021 return(0);
1022}
1023
1024/* Called with ubd_io_lock held */
1025static void do_ubd_request(request_queue_t *q)
1026{
1027 struct io_thread_req io_req;
1028 struct request *req;
1029 int err, n;
1030
1031 if(thread_fd == -1){
1032 while((req = elv_next_request(q)) != NULL){
1033 err = prepare_request(req, &io_req);
1034 if(!err){
1035 do_io(&io_req);
1036 __ubd_finish(req, io_req.error);
1037 }
1038 }
1039 }
1040 else {
1041 if(do_ubd || (req = elv_next_request(q)) == NULL)
1042 return;
1043 err = prepare_request(req, &io_req);
1044 if(!err){
1045 do_ubd = ubd_handler;
1046 n = os_write_file(thread_fd, (char *) &io_req,
1047 sizeof(io_req));
1048 if(n != sizeof(io_req))
1049 printk("write to io thread failed, "
1050 "errno = %d\n", -n);
1051 }
1052 }
1053}
1054
1055static int ubd_ioctl(struct inode * inode, struct file * file,
1056 unsigned int cmd, unsigned long arg)
1057{
1058 struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1059 struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1060 struct hd_driveid ubd_id = {
1061 .cyls = 0,
1062 .heads = 128,
1063 .sectors = 32,
1064 };
1065
1066 switch (cmd) {
1067 struct hd_geometry g;
1068 struct cdrom_volctrl volume;
1069 case HDIO_GETGEO:
1070 if(!loc) return(-EINVAL);
1071 g.heads = 128;
1072 g.sectors = 32;
1073 g.cylinders = dev->size / (128 * 32 * 512);
1074 g.start = get_start_sect(inode->i_bdev);
1075 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1076
1077 case HDIO_GET_IDENTITY:
1078 ubd_id.cyls = dev->size / (128 * 32 * 512);
1079 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1080 sizeof(ubd_id)))
1081 return(-EFAULT);
1082 return(0);
1083
1084 case CDROMVOLREAD:
1085 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1086 return(-EFAULT);
1087 volume.channel0 = 255;
1088 volume.channel1 = 255;
1089 volume.channel2 = 255;
1090 volume.channel3 = 255;
1091 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1092 return(-EFAULT);
1093 return(0);
1094 }
1095 return(-EINVAL);
1096}
1097
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1099{
1100 struct uml_stat buf1, buf2;
1101 int err;
1102
1103 if(from_cmdline == NULL) return(1);
1104 if(!strcmp(from_cmdline, from_cow)) return(1);
1105
1106 err = os_stat_file(from_cmdline, &buf1);
1107 if(err < 0){
1108 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1109 return(1);
1110 }
1111 err = os_stat_file(from_cow, &buf2);
1112 if(err < 0){
1113 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1114 return(1);
1115 }
1116 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1117 return(1);
1118
1119 printk("Backing file mismatch - \"%s\" requested,\n"
1120 "\"%s\" specified in COW header of \"%s\"\n",
1121 from_cmdline, from_cow, cow);
1122 return(0);
1123}
1124
1125static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1126{
1127 unsigned long modtime;
1128 long long actual;
1129 int err;
1130
1131 err = os_file_modtime(file, &modtime);
1132 if(err < 0){
1133 printk("Failed to get modification time of backing file "
1134 "\"%s\", err = %d\n", file, -err);
1135 return(err);
1136 }
1137
1138 err = os_file_size(file, &actual);
1139 if(err < 0){
1140 printk("Failed to get size of backing file \"%s\", "
1141 "err = %d\n", file, -err);
1142 return(err);
1143 }
1144
1145 if(actual != size){
1146 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1147 * the typecast.*/
1148 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1149 "file\n", (unsigned long long) size, actual);
1150 return(-EINVAL);
1151 }
1152 if(modtime != mtime){
1153 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1154 "file\n", mtime, modtime);
1155 return(-EINVAL);
1156 }
1157 return(0);
1158}
1159
1160int read_cow_bitmap(int fd, void *buf, int offset, int len)
1161{
1162 int err;
1163
1164 err = os_seek_file(fd, offset);
1165 if(err < 0)
1166 return(err);
1167
1168 err = os_read_file(fd, buf, len);
1169 if(err < 0)
1170 return(err);
1171
1172 return(0);
1173}
1174
1175int open_ubd_file(char *file, struct openflags *openflags,
1176 char **backing_file_out, int *bitmap_offset_out,
1177 unsigned long *bitmap_len_out, int *data_offset_out,
1178 int *create_cow_out)
1179{
1180 time_t mtime;
1181 unsigned long long size;
1182 __u32 version, align;
1183 char *backing_file;
1184 int fd, err, sectorsize, same, mode = 0644;
1185
1186 fd = os_open_file(file, *openflags, mode);
1187 if(fd < 0){
1188 if((fd == -ENOENT) && (create_cow_out != NULL))
1189 *create_cow_out = 1;
1190 if(!openflags->w ||
1191 ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1192 openflags->w = 0;
1193 fd = os_open_file(file, *openflags, mode);
1194 if(fd < 0)
1195 return(fd);
1196 }
1197
1198 err = os_lock_file(fd, openflags->w);
1199 if(err < 0){
1200 printk("Failed to lock '%s', err = %d\n", file, -err);
1201 goto out_close;
1202 }
1203
1204 if(backing_file_out == NULL) return(fd);
1205
1206 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1207 &size, &sectorsize, &align, bitmap_offset_out);
1208 if(err && (*backing_file_out != NULL)){
1209 printk("Failed to read COW header from COW file \"%s\", "
1210 "errno = %d\n", file, -err);
1211 goto out_close;
1212 }
1213 if(err) return(fd);
1214
1215 if(backing_file_out == NULL) return(fd);
1216
1217 same = same_backing_files(*backing_file_out, backing_file, file);
1218
1219 if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1220 printk("Switching backing file to '%s'\n", *backing_file_out);
1221 err = write_cow_header(file, fd, *backing_file_out,
1222 sectorsize, align, &size);
1223 if(err){
1224 printk("Switch failed, errno = %d\n", -err);
1225 return(err);
1226 }
1227 }
1228 else {
1229 *backing_file_out = backing_file;
1230 err = backing_file_mismatch(*backing_file_out, size, mtime);
1231 if(err) goto out_close;
1232 }
1233
1234 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1235 bitmap_len_out, data_offset_out);
1236
1237 return(fd);
1238 out_close:
1239 os_close_file(fd);
1240 return(err);
1241}
1242
1243int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1244 int sectorsize, int alignment, int *bitmap_offset_out,
1245 unsigned long *bitmap_len_out, int *data_offset_out)
1246{
1247 int err, fd;
1248
1249 flags.c = 1;
1250 fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1251 if(fd < 0){
1252 err = fd;
1253 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1254 -err);
1255 goto out;
1256 }
1257
1258 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1259 bitmap_offset_out, bitmap_len_out,
1260 data_offset_out);
1261 if(!err)
1262 return(fd);
1263 os_close_file(fd);
1264 out:
1265 return(err);
1266}
1267
1268static int update_bitmap(struct io_thread_req *req)
1269{
1270 int n;
1271
1272 if(req->cow_offset == -1)
1273 return(0);
1274
1275 n = os_seek_file(req->fds[1], req->cow_offset);
1276 if(n < 0){
1277 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1278 return(1);
1279 }
1280
1281 n = os_write_file(req->fds[1], &req->bitmap_words,
1282 sizeof(req->bitmap_words));
1283 if(n != sizeof(req->bitmap_words)){
1284 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1285 req->fds[1]);
1286 return(1);
1287 }
1288
1289 return(0);
1290}
1291
1292void do_io(struct io_thread_req *req)
1293{
1294 char *buf;
1295 unsigned long len;
1296 int n, nsectors, start, end, bit;
1297 int err;
1298 __u64 off;
1299
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 nsectors = req->length / req->sectorsize;
1301 start = 0;
1302 do {
1303 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1304 end = start;
1305 while((end < nsectors) &&
1306 (ubd_test_bit(end, (unsigned char *)
1307 &req->sector_mask) == bit))
1308 end++;
1309
1310 off = req->offset + req->offsets[bit] +
1311 start * req->sectorsize;
1312 len = (end - start) * req->sectorsize;
1313 buf = &req->buffer[start * req->sectorsize];
1314
1315 err = os_seek_file(req->fds[bit], off);
1316 if(err < 0){
1317 printk("do_io - lseek failed : err = %d\n", -err);
1318 req->error = 1;
1319 return;
1320 }
1321 if(req->op == UBD_READ){
1322 n = 0;
1323 do {
1324 buf = &buf[n];
1325 len -= n;
1326 n = os_read_file(req->fds[bit], buf, len);
1327 if (n < 0) {
1328 printk("do_io - read failed, err = %d "
1329 "fd = %d\n", -n, req->fds[bit]);
1330 req->error = 1;
1331 return;
1332 }
1333 } while((n < len) && (n != 0));
1334 if (n < len) memset(&buf[n], 0, len - n);
Paolo 'Blaisorblade' Giarrusso2c49be92005-05-01 08:58:57 -07001335 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 n = os_write_file(req->fds[bit], buf, len);
1337 if(n != len){
1338 printk("do_io - write failed err = %d "
1339 "fd = %d\n", -n, req->fds[bit]);
1340 req->error = 1;
1341 return;
1342 }
1343 }
1344
1345 start = end;
1346 } while(start < nsectors);
1347
1348 req->error = update_bitmap(req);
1349}
1350
1351/* Changed in start_io_thread, which is serialized by being called only
1352 * from ubd_init, which is an initcall.
1353 */
1354int kernel_fd = -1;
1355
1356/* Only changed by the io thread */
1357int io_count = 0;
1358
1359int io_thread(void *arg)
1360{
1361 struct io_thread_req req;
1362 int n;
1363
1364 ignore_sigwinch_sig();
1365 while(1){
1366 n = os_read_file(kernel_fd, &req, sizeof(req));
1367 if(n != sizeof(req)){
1368 if(n < 0)
1369 printk("io_thread - read failed, fd = %d, "
1370 "err = %d\n", kernel_fd, -n);
1371 else {
1372 printk("io_thread - short read, fd = %d, "
1373 "length = %d\n", kernel_fd, n);
1374 }
1375 continue;
1376 }
1377 io_count++;
1378 do_io(&req);
1379 n = os_write_file(kernel_fd, &req, sizeof(req));
1380 if(n != sizeof(req))
1381 printk("io_thread - write failed, fd = %d, err = %d\n",
1382 kernel_fd, -n);
1383 }
1384}
1385
1386/*
1387 * Overrides for Emacs so that we follow Linus's tabbing style.
1388 * Emacs will notice this stuff at the end of the file and automatically
1389 * adjust the settings for this buffer only. This must remain at the end
1390 * of the file.
1391 * ---------------------------------------------------------------------------
1392 * Local variables:
1393 * c-file-style: "linux"
1394 * End:
1395 */